mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
c7250ce1db
Adding support for __tile_stream_loadd intrinsic. Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D103784
308 lines
9.7 KiB
C++
308 lines
9.7 KiB
C++
//===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file Pass to config the shape of AMX physical registers
|
|
/// AMX register need to be configured before use. Before FastRegAllocation pass
|
|
/// the ldtilecfg instruction is inserted, however at that time we don't
|
|
/// know the shape of each physical tile registers, because the register
|
|
/// allocation is not done yet. This pass runs after register allocation
|
|
/// pass. It collects the shape information of each physical tile register
|
|
/// and store the shape in the stack slot that is allocated for load config
|
|
/// to tile config register.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "X86.h"
|
|
#include "X86InstrBuilder.h"
|
|
#include "X86MachineFunctionInfo.h"
|
|
#include "X86RegisterInfo.h"
|
|
#include "X86Subtarget.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "fasttileconfig"
|
|
|
|
namespace {
|
|
|
|
class X86FastTileConfig : public MachineFunctionPass {
|
|
// context
|
|
MachineFunction *MF = nullptr;
|
|
const X86Subtarget *ST = nullptr;
|
|
const TargetRegisterInfo *TRI = nullptr;
|
|
const TargetInstrInfo *TII = nullptr;
|
|
MachineRegisterInfo *MRI = nullptr;
|
|
|
|
MachineInstr *getTileConfigPoint();
|
|
void tileConfig();
|
|
|
|
public:
|
|
X86FastTileConfig() : MachineFunctionPass(ID) {}
|
|
|
|
bool fastTileConfig();
|
|
bool isTileLoad(MachineInstr &MI);
|
|
bool isTileStore(MachineInstr &MI);
|
|
bool isAMXInstr(MachineInstr &MI);
|
|
void getTileStoreShape(MachineInstr &MI,
|
|
SmallVector<MachineOperand *> &ShapedTiles);
|
|
|
|
MachineInstr *getKeyAMXInstr(MachineInstr *MI);
|
|
void getTileShapesCfg(MachineInstr *MI,
|
|
SmallVector<MachineOperand *> &ShapedTiles);
|
|
void getShapeCfgInstrs(MachineInstr *MI,
|
|
std::map<unsigned, MachineInstr *> &RowCfgs,
|
|
std::map<unsigned, MachineInstr *> &ColCfgs);
|
|
|
|
/// Return the pass name.
|
|
StringRef getPassName() const override {
|
|
return "Fast Tile Register Configure";
|
|
}
|
|
|
|
void materializeTileCfg(MachineInstr *MI);
|
|
|
|
void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles,
|
|
std::map<unsigned, MachineInstr *> &RowCfgs,
|
|
std::map<unsigned, MachineInstr *> &ColCfgs);
|
|
|
|
/// Perform register allocation.
|
|
bool runOnMachineFunction(MachineFunction &MFunc) override;
|
|
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
return MachineFunctionProperties().set(
|
|
MachineFunctionProperties::Property::NoPHIs);
|
|
}
|
|
|
|
static char ID;
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char X86FastTileConfig::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
|
|
"Fast Tile Register Configure", false, false)
|
|
INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
|
|
"Fast Tile Register Configure", false, false)
|
|
|
|
static bool isTilePhysReg(MachineOperand &Op) {
|
|
if (!Op.isReg())
|
|
return false;
|
|
|
|
Register Reg = Op.getReg();
|
|
if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static unsigned getTilePhysRegIdx(MachineOperand *Op) {
|
|
assert(isTilePhysReg(*Op) && "Tile Operand is invalid");
|
|
return Op->getReg() - X86::TMM0;
|
|
}
|
|
|
|
static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) {
|
|
unsigned Offset = 48 + TIdx;
|
|
MI->getOperand(3).ChangeToImmediate(Offset);
|
|
}
|
|
|
|
static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) {
|
|
unsigned Offset = 16 + TIdx * 2;
|
|
MI->getOperand(3).ChangeToImmediate(Offset);
|
|
}
|
|
|
|
bool X86FastTileConfig::isTileLoad(MachineInstr &MI) {
|
|
return MI.getOpcode() == X86::PTILELOADDV ||
|
|
MI.getOpcode() == X86::PTILELOADDT1V;
|
|
}
|
|
bool X86FastTileConfig::isTileStore(MachineInstr &MI) {
|
|
return MI.getOpcode() == X86::PTILESTOREDV;
|
|
}
|
|
bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) {
|
|
// TODO: May need to handle some special nontile amx instrucion.
|
|
if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr())
|
|
return false;
|
|
|
|
for (MachineOperand &MO : MI.operands())
|
|
if (isTilePhysReg(MO))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) {
|
|
auto Cfg = MachineBasicBlock::iterator(MI);
|
|
MachineBasicBlock *MBB = MI->getParent();
|
|
MachineInstr *KeyMI = nullptr;
|
|
int KeyAMXNum = 0;
|
|
|
|
for (auto II = Cfg; II != MBB->end(); II++) {
|
|
if (isTileLoad(*II)) {
|
|
KeyMI = &*II;
|
|
continue;
|
|
}
|
|
|
|
if (isTileStore(*II)) {
|
|
assert(KeyMI && "Key AMX Should be found before!");
|
|
break;
|
|
}
|
|
|
|
if (isAMXInstr(*II)) {
|
|
assert((KeyAMXNum == 0) && "Too many Key AMX instruction!");
|
|
KeyAMXNum++;
|
|
KeyMI = &*II;
|
|
}
|
|
}
|
|
assert(KeyMI && "There must be an AMX instruction.");
|
|
return KeyMI;
|
|
}
|
|
|
|
// Orderly get the tiles in key amx instruction, uses before defs.
|
|
void X86FastTileConfig::getTileShapesCfg(
|
|
MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) {
|
|
MachineInstr *KeyMI = getKeyAMXInstr(CfgMI);
|
|
|
|
SmallVector<MachineOperand *> DefTiles;
|
|
for (MachineOperand &MO : KeyMI->operands()) {
|
|
if (!isTilePhysReg(MO))
|
|
continue;
|
|
if (MO.isDef())
|
|
DefTiles.push_back(&MO);
|
|
else
|
|
ShapedTiles.push_back(&MO);
|
|
}
|
|
ShapedTiles.append(DefTiles);
|
|
}
|
|
|
|
// We pre-config the shapes at position named with "amx.tmm.N.shape.row* and
|
|
// amx.shape.N.col*" at pass "Pre AMX Tile Config".
|
|
// The 'N' implies the order of tiles in key amx intrinsic.
|
|
void X86FastTileConfig::getShapeCfgInstrs(
|
|
MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs,
|
|
std::map<unsigned, MachineInstr *> &ColCfgs) {
|
|
auto Cfg = MachineBasicBlock::iterator(MI);
|
|
MachineBasicBlock *MBB = MI->getParent();
|
|
|
|
for (auto II = Cfg; II != MBB->begin(); II--) {
|
|
if (isAMXInstr(*II) || II->isTerminator() || II->isCall())
|
|
break;
|
|
if (!II->mayStore() || !II->hasOneMemOperand())
|
|
continue;
|
|
const Value *MemPtr = II->memoperands()[0]->getValue();
|
|
if (!MemPtr)
|
|
continue;
|
|
|
|
StringRef Name = MemPtr->getName();
|
|
if (!Name.startswith("amx.tmm."))
|
|
continue;
|
|
|
|
// Get the 'N'th tile shape config in key amx instruction.
|
|
auto N = Name.find(".shape");
|
|
StringRef STileIdx = Name.slice(8, N);
|
|
unsigned Idx;
|
|
STileIdx.getAsInteger(10, Idx);
|
|
|
|
// And related them with their store instructions.
|
|
if (Name.contains("row"))
|
|
RowCfgs[Idx] = &*II;
|
|
else if (Name.contains("col"))
|
|
ColCfgs[Idx] = &*II;
|
|
else
|
|
llvm_unreachable("Invalid tile shape info!");
|
|
}
|
|
assert((RowCfgs.size() == ColCfgs.size()) &&
|
|
"The number of tile row and col must be equal!");
|
|
}
|
|
|
|
// Here is the data format for the tile config.
|
|
// 0 palette = 1 now.
|
|
// 1 start_row = 0 now.
|
|
// 2-15 reserved, must be zero
|
|
// 16-17 tile0.colsb Tile 0 bytes per row.
|
|
// 18-19 tile1.colsb Tile 1 bytes per row.
|
|
// 20-21 tile2.colsb Tile 2 bytes per row.
|
|
// ... (sequence continues)
|
|
// 30-31 tile7.colsb Tile 7 bytes per row.
|
|
// 32-47 reserved, must be zero
|
|
// 48 tile0.rows Tile 0 rows.
|
|
// 49 tile1.rows Tile 1 rows.
|
|
// 50 tile2.rows Tile 2 rows.
|
|
// ... (sequence continues)
|
|
// 55 tile7.rows Tile 7 rows.
|
|
// 56-63 reserved, must be zero
|
|
void X86FastTileConfig::rewriteTileCfg(
|
|
SmallVector<MachineOperand *> &ShapedTiles,
|
|
std::map<unsigned, MachineInstr *> &RowCfgs,
|
|
std::map<unsigned, MachineInstr *> &ColCfgs) {
|
|
assert((RowCfgs.size() == ShapedTiles.size()) &&
|
|
"The number of tile shapes not equal with the number of tiles!");
|
|
|
|
// Orderly get the tiles and adjust the shape config.
|
|
for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) {
|
|
MachineOperand *MO = ShapedTiles[I];
|
|
unsigned TmmIdx = getTilePhysRegIdx(MO);
|
|
if (I == TmmIdx)
|
|
continue;
|
|
adjustRowCfg(TmmIdx, RowCfgs[I]);
|
|
adjustColCfg(TmmIdx, ColCfgs[I]);
|
|
}
|
|
}
|
|
|
|
// We have already preconfig the shapes before fast register allocation at
|
|
// X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register
|
|
// allocation, the shapes pre-written before may not rightly corresponding
|
|
// to the correct tmm registers, so we need adjust them.
|
|
void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) {
|
|
SmallVector<MachineOperand *> ShapedTiles;
|
|
std::map<unsigned, MachineInstr *> RowCfgs;
|
|
std::map<unsigned, MachineInstr *> ColCfgs;
|
|
|
|
// Orderly keep the tile uses and def in ShapedTiles;
|
|
getTileShapesCfg(CfgMI, ShapedTiles);
|
|
assert(ShapedTiles.size() && "Not find shapes config!");
|
|
|
|
getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs);
|
|
|
|
rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs);
|
|
}
|
|
|
|
bool X86FastTileConfig::fastTileConfig() {
|
|
bool Changed = false;
|
|
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
SmallVector<MachineInstr *, 2> CFGs;
|
|
for (MachineInstr &MI : MBB)
|
|
if (MI.getOpcode() == X86::PLDTILECFGV)
|
|
CFGs.push_back(&MI);
|
|
for (auto *MI : CFGs)
|
|
materializeTileCfg(MI);
|
|
if (!CFGs.empty())
|
|
Changed = true;
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
|
|
MF = &MFunc;
|
|
MRI = &MFunc.getRegInfo();
|
|
ST = &MFunc.getSubtarget<X86Subtarget>();
|
|
TRI = ST->getRegisterInfo();
|
|
TII = MFunc.getSubtarget().getInstrInfo();
|
|
|
|
return fastTileConfig();
|
|
}
|
|
|
|
FunctionPass *llvm::createX86FastTileConfigPass() {
|
|
return new X86FastTileConfig();
|
|
}
|