mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Refactor Subtarget classes
Summary: This is a follow-up to r335942. - Merge SISubtarget into AMDGPUSubtarget and rename to GCNSubtarget - Rename AMDGPUCommonSubtarget to AMDGPUSubtarget - Merge R600Subtarget::Generation and GCNSubtarget::Generation into AMDGPUSubtarget::Generation. Reviewers: arsenm, jvesely Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D49037 llvm-svn: 336851
This commit is contained in:
parent
64d037b323
commit
e236141513
@ -411,23 +411,23 @@ def FeatureGCN : SubtargetFeature<"gcn",
|
||||
"GCN or newer GPU"
|
||||
>;
|
||||
|
||||
class AMDGPUSubtargetFeatureGeneration <string Value,
|
||||
class GCNSubtargetFeatureGeneration <string Value,
|
||||
list<SubtargetFeature> Implies> :
|
||||
SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>;
|
||||
SubtargetFeatureGeneration <Value, "GCNSubtarget", Implies>;
|
||||
|
||||
def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureGCN,
|
||||
FeatureLDSBankCount32, FeatureMovrel]
|
||||
>;
|
||||
|
||||
def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, FeatureMovrel]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
@ -438,7 +438,7 @@ def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS"
|
||||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
|
||||
def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
|
@ -219,7 +219,7 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
||||
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
|
||||
bool HasFlat = ST.hasFlatAddressSpace();
|
||||
bool HasApertureRegs = ST.hasApertureRegs();
|
||||
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
|
||||
|
@ -18,7 +18,7 @@ namespace llvm {
|
||||
|
||||
class Function;
|
||||
class raw_ostream;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
class TargetMachine;
|
||||
class TargetRegisterClass;
|
||||
class TargetRegisterInfo;
|
||||
|
@ -67,7 +67,7 @@ using namespace llvm::AMDGPU;
|
||||
// instructions to run at the double precision rate for the device so it's
|
||||
// probably best to just report no single precision denormals.
|
||||
static uint32_t getFPMode(const MachineFunction &F) {
|
||||
const SISubtarget& ST = F.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget& ST = F.getSubtarget<GCNSubtarget>();
|
||||
// TODO: Is there any real use for the flush in only / flush out only modes?
|
||||
|
||||
uint32_t FP32Denormals =
|
||||
@ -197,7 +197,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
|
||||
amd_kernel_code_t KernelCode;
|
||||
if (STM.isAmdCodeObjectV2(MF->getFunction())) {
|
||||
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
|
||||
@ -255,14 +255,14 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
||||
}
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
|
||||
if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
|
||||
SmallString<128> SymbolName;
|
||||
getNameWithPrefix(SymbolName, &MF->getFunction()),
|
||||
getTargetStreamer()->EmitAMDGPUSymbolType(
|
||||
SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
|
||||
}
|
||||
const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
|
||||
if (STI.dumpCode()) {
|
||||
// Disassemble function name label to text.
|
||||
DisasmLines.push_back(MF->getName().str() + ":");
|
||||
@ -274,7 +274,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
|
||||
const AMDGPUSubtarget &STI = MBB.getParent()->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
|
||||
if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
|
||||
// Write a line for the basic block label if it is not only fallthrough.
|
||||
DisasmLines.push_back(
|
||||
@ -399,7 +399,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
SetupMachineFunction(MF);
|
||||
|
||||
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
MCContext &Context = getObjFileLowering().getContext();
|
||||
// FIXME: This should be an explicit check for Mesa.
|
||||
if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
|
||||
@ -440,7 +440,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
|
||||
emitCommonFunctionComments(
|
||||
Info.NumVGPR,
|
||||
Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()),
|
||||
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
|
||||
Info.PrivateSegmentSize,
|
||||
getFunctionCodeSize(MF), MFI);
|
||||
return false;
|
||||
@ -475,7 +475,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
OutStreamer->emitRawComment(
|
||||
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
|
||||
|
||||
if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
|
||||
if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
|
||||
OutStreamer->emitRawComment(
|
||||
" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
|
||||
Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
|
||||
@ -526,7 +526,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = STM.getInstrInfo();
|
||||
|
||||
uint64_t CodeSize = 0;
|
||||
@ -558,7 +558,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
|
||||
}
|
||||
|
||||
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
|
||||
const SISubtarget &ST) const {
|
||||
const GCNSubtarget &ST) const {
|
||||
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
|
||||
UsesVCC, UsesFlatScratch);
|
||||
}
|
||||
@ -568,7 +568,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
||||
SIFunctionResourceInfo Info;
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
@ -812,7 +812,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
MF.getFunction().getContext().diagnose(DiagStackSize);
|
||||
}
|
||||
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SIInstrInfo *TII = STM.getInstrInfo();
|
||||
const SIRegisterInfo *RI = &TII->getRegisterInfo();
|
||||
@ -927,7 +927,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
ProgInfo.DX10Clamp = STM.enableDX10Clamp();
|
||||
|
||||
unsigned LDSAlignShift;
|
||||
if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
|
||||
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
// LDS is allocated in 64 dword blocks.
|
||||
LDSAlignShift = 8;
|
||||
} else {
|
||||
@ -1000,7 +1000,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
const SIProgramInfo &CurrentProgramInfo) {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
|
||||
|
||||
@ -1129,7 +1129,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
||||
const SIProgramInfo &CurrentProgramInfo,
|
||||
const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
|
||||
|
||||
|
@ -34,7 +34,7 @@ namespace llvm {
|
||||
class AMDGPUMachineFunction;
|
||||
class AMDGPUTargetStreamer;
|
||||
class MCOperand;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
class AMDGPUAsmPrinter final : public AsmPrinter {
|
||||
private:
|
||||
@ -50,7 +50,7 @@ private:
|
||||
bool HasDynamicallySizedStack = false;
|
||||
bool HasRecursion = false;
|
||||
|
||||
int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
|
||||
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
|
||||
};
|
||||
|
||||
SIProgramInfo CurrentProgramInfo;
|
||||
|
@ -95,10 +95,10 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||
return false;
|
||||
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||
const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
|
@ -132,11 +132,11 @@ def RetCC_AMDGPU_Func : CallingConv<[
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
CCIf<"static_cast<const GCNSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
|
||||
CCDelegateTo<CC_SI>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
CCIf<"static_cast<const GCNSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
|
||||
CCDelegateTo<CC_AMDGPU_Func>>
|
||||
|
@ -57,7 +57,7 @@ static cl::opt<bool> WidenLoads(
|
||||
|
||||
class AMDGPUCodeGenPrepare : public FunctionPass,
|
||||
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
|
||||
const SISubtarget *ST = nullptr;
|
||||
const GCNSubtarget *ST = nullptr;
|
||||
DivergenceAnalysis *DA = nullptr;
|
||||
Module *Mod = nullptr;
|
||||
bool HasUnsafeFPMath = false;
|
||||
@ -890,7 +890,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
|
||||
return false;
|
||||
|
||||
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
|
||||
ST = &TM.getSubtarget<SISubtarget>(F);
|
||||
ST = &TM.getSubtarget<GCNSubtarget>(F);
|
||||
DA = &getAnalysis<DivergenceAnalysis>();
|
||||
HasUnsafeFPMath = hasUnsafeFPMath(F);
|
||||
AMDGPUASI = TM.getAMDGPUAS();
|
||||
|
@ -203,7 +203,7 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
|
||||
Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
|
||||
const Function &F = MF.getFunction();
|
||||
@ -233,7 +233,7 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
|
||||
Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
|
||||
|
||||
if (!STM.debuggerSupported())
|
||||
|
@ -71,7 +71,7 @@ namespace {
|
||||
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
||||
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
||||
// make the right decision when generating code for different targets.
|
||||
const AMDGPUSubtarget *Subtarget;
|
||||
const GCNSubtarget *Subtarget;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
bool EnableLateStructurizeCFG;
|
||||
|
||||
@ -274,7 +274,7 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
||||
Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
|
||||
Subtarget = &MF.getSubtarget<GCNSubtarget>();
|
||||
return SelectionDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
@ -316,7 +316,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
|
||||
}
|
||||
|
||||
const SIRegisterInfo *TRI
|
||||
= static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
|
||||
= static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
|
||||
return TRI->getPhysRegClass(Reg);
|
||||
}
|
||||
|
||||
@ -1397,7 +1397,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
|
||||
return false;
|
||||
|
||||
SDLoc SL(ByteOffsetNode);
|
||||
AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
|
||||
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
|
||||
int64_t ByteOffset = C->getSExtValue();
|
||||
int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
|
||||
|
||||
@ -1664,7 +1664,7 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
|
||||
return true;
|
||||
|
||||
if (VT == MVT::i64) {
|
||||
auto ST = static_cast<const SISubtarget *>(Subtarget);
|
||||
auto ST = static_cast<const GCNSubtarget *>(Subtarget);
|
||||
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||
return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
|
||||
|
@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
const AMDGPUCommonSubtarget &STI)
|
||||
const AMDGPUSubtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
|
||||
// Lower floating point store/load to integer store/load to reduce the number
|
||||
@ -3939,8 +3939,8 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
|
||||
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
|
||||
const MachineFunction &MF, const ImplicitParameter Param) const {
|
||||
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||
const AMDGPUCommonSubtarget &ST =
|
||||
AMDGPUCommonSubtarget::get(getTargetMachine(), MF.getFunction());
|
||||
const AMDGPUSubtarget &ST =
|
||||
AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction());
|
||||
unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
|
||||
unsigned Alignment = ST.getAlignmentForImplicitArgPtr();
|
||||
uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
|
||||
@ -4242,8 +4242,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
|
||||
switch (IID) {
|
||||
case Intrinsic::amdgcn_mbcnt_lo:
|
||||
case Intrinsic::amdgcn_mbcnt_hi: {
|
||||
const SISubtarget &ST =
|
||||
DAG.getMachineFunction().getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST =
|
||||
DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
|
||||
// These return at most the wavefront size - 1.
|
||||
unsigned Size = Op.getValueType().getSizeInBits();
|
||||
Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
|
||||
|
@ -23,12 +23,12 @@
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUMachineFunction;
|
||||
class AMDGPUCommonSubtarget;
|
||||
class AMDGPUSubtarget;
|
||||
struct ArgDescriptor;
|
||||
|
||||
class AMDGPUTargetLowering : public TargetLowering {
|
||||
private:
|
||||
const AMDGPUCommonSubtarget *Subtarget;
|
||||
const AMDGPUSubtarget *Subtarget;
|
||||
|
||||
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
|
||||
/// legalized from a smaller type VT. Need to match pre-legalized type because
|
||||
@ -125,7 +125,7 @@ protected:
|
||||
void analyzeFormalArgumentsCompute(CCState &State,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
||||
public:
|
||||
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
|
||||
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
|
||||
|
||||
bool mayIgnoreSignedZero(SDValue Op) const {
|
||||
if (getTargetMachine().Options.NoSignedZerosFPMath)
|
||||
|
@ -26,7 +26,7 @@ using namespace llvm;
|
||||
// Pin the vtable to this file.
|
||||
//void AMDGPUInstrInfo::anchor() {}
|
||||
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(const GCNSubtarget &ST) { }
|
||||
|
||||
|
||||
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
|
||||
|
@ -22,14 +22,14 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
class GCNSubtarget;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class AMDGPUInstrInfo {
|
||||
public:
|
||||
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
|
||||
explicit AMDGPUInstrInfo(const GCNSubtarget &st);
|
||||
|
||||
static bool isUniformMMO(const MachineMemOperand *MMO);
|
||||
};
|
||||
|
@ -37,11 +37,13 @@
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_GLOBALISEL_IMPL
|
||||
#define AMDGPUSubtarget GCNSubtarget
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
#undef GET_GLOBALISEL_IMPL
|
||||
#undef AMDGPUSubtarget
|
||||
|
||||
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
|
||||
const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI,
|
||||
const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
|
||||
const AMDGPUTargetMachine &TM)
|
||||
: InstructionSelector(), TII(*STI.getInstrInfo()),
|
||||
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
|
||||
@ -447,7 +449,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
|
||||
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
MachineFunction *MF = BB->getParent();
|
||||
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
unsigned DstReg = I.getOperand(0).getReg();
|
||||
const DebugLoc &DL = I.getDebugLoc();
|
||||
|
@ -22,26 +22,27 @@
|
||||
|
||||
namespace {
|
||||
#define GET_GLOBALISEL_PREDICATE_BITSET
|
||||
#define AMDGPUSubtarget GCNSubtarget
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
#undef GET_GLOBALISEL_PREDICATE_BITSET
|
||||
#undef AMDGPUSubtarget
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUInstrInfo;
|
||||
class AMDGPURegisterBankInfo;
|
||||
class AMDGPUSubtarget;
|
||||
class GCNSubtarget;
|
||||
class MachineInstr;
|
||||
class MachineOperand;
|
||||
class MachineRegisterInfo;
|
||||
class SIInstrInfo;
|
||||
class SIMachineFunctionInfo;
|
||||
class SIRegisterInfo;
|
||||
class SISubtarget;
|
||||
|
||||
class AMDGPUInstructionSelector : public InstructionSelector {
|
||||
public:
|
||||
AMDGPUInstructionSelector(const SISubtarget &STI,
|
||||
AMDGPUInstructionSelector(const GCNSubtarget &STI,
|
||||
const AMDGPURegisterBankInfo &RBI,
|
||||
const AMDGPUTargetMachine &TM);
|
||||
|
||||
@ -91,11 +92,13 @@ private:
|
||||
const SIRegisterInfo &TRI;
|
||||
const AMDGPURegisterBankInfo &RBI;
|
||||
const AMDGPUTargetMachine &TM;
|
||||
const SISubtarget &STI;
|
||||
const GCNSubtarget &STI;
|
||||
bool EnableLateStructurizeCFG;
|
||||
#define GET_GLOBALISEL_PREDICATES_DECL
|
||||
#define AMDGPUSubtarget GCNSubtarget
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
#undef GET_GLOBALISEL_PREDICATES_DECL
|
||||
#undef AMDGPUSubtarget
|
||||
|
||||
#define GET_GLOBALISEL_TEMPORARIES_DECL
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
|
@ -24,7 +24,7 @@
|
||||
using namespace llvm;
|
||||
using namespace LegalizeActions;
|
||||
|
||||
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST,
|
||||
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
const GCNTargetMachine &TM) {
|
||||
using namespace TargetOpcode;
|
||||
|
||||
|
@ -21,12 +21,12 @@ namespace llvm {
|
||||
|
||||
class GCNTargetMachine;
|
||||
class LLVMContext;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
/// This class provides the information for the target register banks.
|
||||
class AMDGPULegalizerInfo : public LegalizerInfo {
|
||||
public:
|
||||
AMDGPULegalizerInfo(const SISubtarget &ST,
|
||||
AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
const GCNTargetMachine &TM);
|
||||
};
|
||||
} // End llvm namespace.
|
||||
|
@ -124,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
|
||||
if (!CI)
|
||||
continue;
|
||||
|
||||
Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
|
||||
Changed |= AMDGPUSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
|
||||
auto &TPC = getAnalysis<TargetPassConfig>();
|
||||
|
||||
const TargetMachine &TM = TPC.getTM<TargetMachine>();
|
||||
const SISubtarget &ST = TM.getSubtarget<SISubtarget>(F);
|
||||
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
|
||||
LLVMContext &Ctx = F.getParent()->getContext();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
BasicBlock &EntryBlock = *F.begin();
|
||||
|
@ -204,7 +204,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
|
||||
bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
|
||||
MCOperand &MCOp) const {
|
||||
const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
|
||||
AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
|
||||
return MCInstLowering.lowerOperand(MO, MCOp);
|
||||
}
|
||||
@ -243,7 +243,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
if (emitPseudoExpansionLowering(*OutStreamer, MI))
|
||||
return;
|
||||
|
||||
const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
|
||||
AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
|
||||
|
||||
StringRef Err;
|
||||
|
@ -2871,7 +2871,7 @@ static void checkRegOnlyPHIInputs(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
TRI = ST.getRegisterInfo();
|
||||
MRI = &(MF.getRegInfo());
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
class AMDGPUMachineFunction : public MachineFunctionInfo {
|
||||
/// A map to keep track of local memory objects and their offsets within the
|
||||
|
@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
||||
IsAMDGCN = TT.getArch() == Triple::amdgcn;
|
||||
IsAMDHSA = TT.getOS() == Triple::AMDHSA;
|
||||
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
|
||||
if (!ST.isPromoteAllocaEnabled())
|
||||
return false;
|
||||
|
||||
@ -175,7 +175,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
||||
std::pair<Value *, Value *>
|
||||
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||
const Function &F = *Builder.GetInsertBlock()->getParent();
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
|
||||
|
||||
if (!IsAMDHSA) {
|
||||
Function *LocalSizeYFn
|
||||
@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||
}
|
||||
|
||||
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
|
||||
const AMDGPUCommonSubtarget &ST =
|
||||
AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
|
||||
const AMDGPUSubtarget &ST =
|
||||
AMDGPUSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
|
||||
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
|
||||
|
||||
switch (N) {
|
||||
@ -603,7 +603,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
|
||||
bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
|
||||
|
||||
FunctionType *FTy = F.getFunctionType();
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
|
||||
|
||||
// If the function has any arguments in the local address space, then it's
|
||||
// possible these arguments require the entire local memory space, so
|
||||
@ -730,7 +730,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
if (!SufficientLDS)
|
||||
return false;
|
||||
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
|
||||
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
|
||||
|
||||
const DataLayout &DL = Mod->getDataLayout();
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
class GCNSubtarget;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
|
||||
|
@ -34,12 +34,14 @@ using namespace llvm;
|
||||
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#define AMDGPUSubtarget GCNSubtarget
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#undef AMDGPUSubtarget
|
||||
#include "R600GenSubtargetInfo.inc"
|
||||
|
||||
AMDGPUSubtarget::~AMDGPUSubtarget() = default;
|
||||
GCNSubtarget::~GCNSubtarget() = default;
|
||||
|
||||
R600Subtarget &
|
||||
R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
@ -51,7 +53,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
// FIXME: I don't think think Evergreen has any useful support for
|
||||
// denormals, but should be checked. Should we issue a warning somewhere
|
||||
// if someone tries to enable these?
|
||||
if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
|
||||
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
FP32Denormals = false;
|
||||
}
|
||||
|
||||
@ -61,8 +63,8 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
return *this;
|
||||
}
|
||||
|
||||
AMDGPUSubtarget &
|
||||
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
GCNSubtarget &
|
||||
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS) {
|
||||
// Determine default and user-specified characteristics
|
||||
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
|
||||
@ -122,7 +124,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
return *this;
|
||||
}
|
||||
|
||||
AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
|
||||
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
|
||||
const FeatureBitset &FeatureBits) :
|
||||
TargetTriple(TT),
|
||||
SubtargetFeatureBits(FeatureBits),
|
||||
@ -140,11 +142,10 @@ AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
|
||||
WavefrontSize(0)
|
||||
{ }
|
||||
|
||||
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const TargetMachine &TM) :
|
||||
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const GCNTargetMachine &TM) :
|
||||
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
||||
AMDGPUCommonSubtarget(TT, getFeatureBits()),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
AMDGPUSubtarget(TT, getFeatureBits()),
|
||||
TargetTriple(TT),
|
||||
Gen(SOUTHERN_ISLANDS),
|
||||
IsaVersion(ISAVersion0_0_0),
|
||||
@ -206,12 +207,20 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
|
||||
ScalarizeGlobal(false),
|
||||
|
||||
FeatureDisable(false) {
|
||||
FeatureDisable(false),
|
||||
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
|
||||
InstrInfo(*this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
|
||||
AS = AMDGPU::getAMDGPUAS(TT);
|
||||
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
|
||||
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
|
||||
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
|
||||
InstSelector.reset(new AMDGPUInstructionSelector(
|
||||
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
|
||||
initializeSubtargetDependencies(TT, GPU, FS);
|
||||
}
|
||||
|
||||
unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
||||
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
||||
const Function &F) const {
|
||||
if (NWaves == 1)
|
||||
return getLocalMemorySize();
|
||||
@ -221,7 +230,7 @@ unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
||||
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
|
||||
}
|
||||
|
||||
unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
||||
unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
||||
const Function &F) const {
|
||||
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
|
||||
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
|
||||
@ -234,13 +243,13 @@ unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
||||
}
|
||||
|
||||
unsigned
|
||||
AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
|
||||
AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
|
||||
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
||||
AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
||||
switch (CC) {
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
@ -258,7 +267,7 @@ AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
|
||||
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
|
||||
const Function &F) const {
|
||||
// FIXME: 1024 if function.
|
||||
// Default minimum/maximum flat work group sizes.
|
||||
@ -288,7 +297,7 @@ std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
|
||||
return Requested;
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
|
||||
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
|
||||
const Function &F) const {
|
||||
// Default minimum/maximum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
|
||||
@ -336,7 +345,7 @@ std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
|
||||
return Requested;
|
||||
}
|
||||
|
||||
bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
||||
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
||||
Function *Kernel = I->getParent()->getParent();
|
||||
unsigned MinSize = 0;
|
||||
unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
|
||||
@ -401,7 +410,7 @@ bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
||||
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const TargetMachine &TM) :
|
||||
R600GenSubtargetInfo(TT, GPU, FS),
|
||||
AMDGPUCommonSubtarget(TT, getFeatureBits()),
|
||||
AMDGPUSubtarget(TT, getFeatureBits()),
|
||||
InstrInfo(*this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
FMA(false),
|
||||
@ -417,20 +426,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
InstrItins(getInstrItineraryForCPU(GPU)),
|
||||
AS (AMDGPU::getAMDGPUAS(TT)) { }
|
||||
|
||||
SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const GCNTargetMachine &TM)
|
||||
: AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
TLInfo(TM, *this) {
|
||||
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
|
||||
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
|
||||
|
||||
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
|
||||
InstSelector.reset(new AMDGPUInstructionSelector(
|
||||
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
|
||||
}
|
||||
|
||||
void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
unsigned NumRegionInstrs) const {
|
||||
// Track register pressure so the scheduler can try to decrease
|
||||
// pressure once register usage is above the threshold defined by
|
||||
@ -447,11 +443,11 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
Policy.ShouldTrackLaneMasks = true;
|
||||
}
|
||||
|
||||
bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
|
||||
bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const {
|
||||
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
|
||||
}
|
||||
|
||||
uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const {
|
||||
uint64_t GCNSubtarget::getExplicitKernArgSize(const Function &F) const {
|
||||
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL);
|
||||
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
@ -467,7 +463,7 @@ uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const {
|
||||
return ExplicitArgBytes;
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
|
||||
unsigned GCNSubtarget::getKernArgSegmentSize(const Function &F,
|
||||
int64_t ExplicitArgBytes) const {
|
||||
if (ExplicitArgBytes == -1)
|
||||
ExplicitArgBytes = getExplicitKernArgSize(F);
|
||||
@ -485,8 +481,8 @@ unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
|
||||
return alignTo(TotalSize, 4);
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
|
||||
if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
if (SGPRs <= 80)
|
||||
return 10;
|
||||
if (SGPRs <= 88)
|
||||
@ -508,7 +504,7 @@ unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
|
||||
return 5;
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
||||
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
||||
if (VGPRs <= 24)
|
||||
return 10;
|
||||
if (VGPRs <= 28)
|
||||
@ -530,7 +526,7 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (MFI.hasFlatScratchInit()) {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
@ -544,7 +540,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
return 2; // VCC.
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
@ -594,7 +590,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
MaxAddressableNumSGPRs);
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
@ -675,21 +671,21 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void SISubtarget::getPostRAMutations(
|
||||
void GCNSubtarget::getPostRAMutations(
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
|
||||
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
|
||||
}
|
||||
|
||||
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
|
||||
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
|
||||
if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>());
|
||||
return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
|
||||
else
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>());
|
||||
return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
|
||||
}
|
||||
|
||||
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
|
||||
const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) {
|
||||
if (TM.getTargetTriple().getArch() == Triple::amdgcn)
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F));
|
||||
return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
|
||||
else
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
|
||||
return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
|
||||
}
|
||||
|
@ -46,7 +46,19 @@ namespace llvm {
|
||||
|
||||
class StringRef;
|
||||
|
||||
class AMDGPUCommonSubtarget {
|
||||
class AMDGPUSubtarget {
|
||||
public:
|
||||
enum Generation {
|
||||
R600 = 0,
|
||||
R700 = 1,
|
||||
EVERGREEN = 2,
|
||||
NORTHERN_ISLANDS = 3,
|
||||
SOUTHERN_ISLANDS = 4,
|
||||
SEA_ISLANDS = 5,
|
||||
VOLCANIC_ISLANDS = 6,
|
||||
GFX9 = 7
|
||||
};
|
||||
|
||||
private:
|
||||
Triple TargetTriple;
|
||||
|
||||
@ -66,10 +78,10 @@ protected:
|
||||
unsigned WavefrontSize;
|
||||
|
||||
public:
|
||||
AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
|
||||
AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
|
||||
|
||||
static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
|
||||
static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
|
||||
static const AMDGPUSubtarget &get(const MachineFunction &MF);
|
||||
static const AMDGPUSubtarget &get(const TargetMachine &TM,
|
||||
const Function &F);
|
||||
|
||||
/// \returns Default range flat work group size for a calling convention.
|
||||
@ -219,21 +231,12 @@ public:
|
||||
/// Creates value range metadata on an workitemid.* inrinsic call or load.
|
||||
bool makeLIDRangeMetadata(Instruction *I) const;
|
||||
|
||||
virtual ~AMDGPUCommonSubtarget() {}
|
||||
virtual ~AMDGPUSubtarget() {}
|
||||
};
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
|
||||
public AMDGPUCommonSubtarget {
|
||||
class GCNSubtarget : public AMDGPUGenSubtargetInfo,
|
||||
public AMDGPUSubtarget {
|
||||
public:
|
||||
enum Generation {
|
||||
// Gap for R600 generations, so we can do comparisons between
|
||||
// AMDGPUSubtarget and r600Subtarget.
|
||||
SOUTHERN_ISLANDS = 4,
|
||||
SEA_ISLANDS = 5,
|
||||
VOLCANIC_ISLANDS = 6,
|
||||
GFX9 = 7,
|
||||
};
|
||||
|
||||
enum {
|
||||
ISAVersion0_0_0,
|
||||
ISAVersion6_0_0,
|
||||
@ -274,8 +277,6 @@ public:
|
||||
};
|
||||
|
||||
private:
|
||||
SIFrameLowering FrameLowering;
|
||||
|
||||
/// GlobalISel related APIs.
|
||||
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
|
||||
std::unique_ptr<InstructionSelector> InstSelector;
|
||||
@ -360,24 +361,34 @@ protected:
|
||||
|
||||
SelectionDAGTargetInfo TSInfo;
|
||||
AMDGPUAS AS;
|
||||
private:
|
||||
SITargetLowering TLInfo;
|
||||
SIInstrInfo InstrInfo;
|
||||
SIFrameLowering FrameLowering;
|
||||
|
||||
public:
|
||||
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const TargetMachine &TM);
|
||||
~AMDGPUSubtarget() override;
|
||||
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const GCNTargetMachine &TM);
|
||||
~GCNSubtarget() override;
|
||||
|
||||
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
|
||||
GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS);
|
||||
|
||||
virtual const SIInstrInfo *getInstrInfo() const override = 0;
|
||||
const SIInstrInfo *getInstrInfo() const override {
|
||||
return &InstrInfo;
|
||||
}
|
||||
|
||||
const SIFrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
virtual const SITargetLowering *getTargetLowering() const override = 0;
|
||||
const SITargetLowering *getTargetLowering() const override {
|
||||
return &TLInfo;
|
||||
}
|
||||
|
||||
virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
|
||||
const SIRegisterInfo *getRegisterInfo() const override {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
|
||||
const CallLowering *getCallLowering() const override {
|
||||
return CallLoweringInfo.get();
|
||||
@ -720,55 +731,7 @@ public:
|
||||
return AMDGPU::IsaInfo::getWavesPerWorkGroup(
|
||||
MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
|
||||
}
|
||||
};
|
||||
|
||||
class SISubtarget final : public AMDGPUSubtarget {
|
||||
private:
|
||||
SIInstrInfo InstrInfo;
|
||||
SIFrameLowering FrameLowering;
|
||||
SITargetLowering TLInfo;
|
||||
|
||||
/// GlobalISel related APIs.
|
||||
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
|
||||
std::unique_ptr<InstructionSelector> InstSelector;
|
||||
std::unique_ptr<LegalizerInfo> Legalizer;
|
||||
std::unique_ptr<RegisterBankInfo> RegBankInfo;
|
||||
|
||||
public:
|
||||
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
const GCNTargetMachine &TM);
|
||||
|
||||
const SIInstrInfo *getInstrInfo() const override {
|
||||
return &InstrInfo;
|
||||
}
|
||||
|
||||
const SIFrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
const SITargetLowering *getTargetLowering() const override {
|
||||
return &TLInfo;
|
||||
}
|
||||
|
||||
const CallLowering *getCallLowering() const override {
|
||||
return CallLoweringInfo.get();
|
||||
}
|
||||
|
||||
const InstructionSelector *getInstructionSelector() const override {
|
||||
return InstSelector.get();
|
||||
}
|
||||
|
||||
const LegalizerInfo *getLegalizerInfo() const override {
|
||||
return Legalizer.get();
|
||||
}
|
||||
|
||||
const RegisterBankInfo *getRegBankInfo() const override {
|
||||
return RegBankInfo.get();
|
||||
}
|
||||
|
||||
const SIRegisterInfo *getRegisterInfo() const override {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
// static wrappers
|
||||
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
|
||||
|
||||
@ -988,12 +951,8 @@ public:
|
||||
const override;
|
||||
};
|
||||
|
||||
|
||||
class R600Subtarget final : public R600GenSubtargetInfo,
|
||||
public AMDGPUCommonSubtarget {
|
||||
public:
|
||||
enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
|
||||
|
||||
public AMDGPUSubtarget {
|
||||
private:
|
||||
R600InstrInfo InstrInfo;
|
||||
R600FrameLowering FrameLowering;
|
||||
|
@ -466,7 +466,7 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
|
||||
CodeGenOpt::Level OL, bool JIT)
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
|
||||
|
||||
const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
||||
const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
||||
StringRef GPU = getGPUName(F);
|
||||
StringRef FS = getFeatureString(F);
|
||||
|
||||
@ -479,7 +479,7 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
||||
// creation will depend on the TM and the code generation flags on the
|
||||
// function that reside in TargetOptions.
|
||||
resetTargetOptions(F);
|
||||
I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
|
||||
I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
|
||||
}
|
||||
|
||||
I->setScalarizeGlobalBehavior(ScalarizeGlobal);
|
||||
@ -750,7 +750,7 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
|
||||
ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
|
||||
MachineSchedContext *C) const {
|
||||
const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
|
||||
if (ST.enableSIScheduler())
|
||||
return createSIMachineScheduler(C);
|
||||
return createGCNMaxOccupancyMachineScheduler(C);
|
||||
|
@ -100,7 +100,7 @@ public:
|
||||
class GCNTargetMachine final : public AMDGPUTargetMachine {
|
||||
private:
|
||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
||||
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
|
||||
mutable StringMap<std::unique_ptr<GCNSubtarget>> SubtargetMap;
|
||||
|
||||
public:
|
||||
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
||||
@ -110,7 +110,7 @@ public:
|
||||
|
||||
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
|
||||
|
||||
const SISubtarget *getSubtargetImpl(const Function &) const override;
|
||||
const GCNSubtarget *getSubtargetImpl(const Function &) const override;
|
||||
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
|
@ -62,7 +62,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const GCNSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
AMDGPUTTIImpl CommonTTI;
|
||||
bool IsGraphicsShader;
|
||||
@ -91,7 +91,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
AMDGPU::HalfRate64Ops
|
||||
};
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const GCNSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
static inline int getFullRateInstrCost() {
|
||||
@ -118,7 +118,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
public:
|
||||
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
|
||||
ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
|
||||
TLI(ST->getTargetLowering()),
|
||||
CommonTTI(TM, F),
|
||||
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
|
||||
|
@ -14,13 +14,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isEG : Predicate<
|
||||
"Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
|
||||
"!Subtarget->hasCaymanISA()"
|
||||
>;
|
||||
|
||||
def isEGorCayman : Predicate<
|
||||
"Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
|
||||
"Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
|
||||
>;
|
||||
|
||||
class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
|
||||
|
@ -40,7 +40,7 @@ using namespace llvm;
|
||||
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
|
||||
CurrCycleInstr(nullptr),
|
||||
MF(MF),
|
||||
ST(MF.getSubtarget<SISubtarget>()),
|
||||
ST(MF.getSubtarget<GCNSubtarget>()),
|
||||
TII(*ST.getInstrInfo()),
|
||||
TRI(TII.getRegisterInfo()),
|
||||
ClauseUses(TRI.getNumRegUnits()),
|
||||
@ -356,13 +356,13 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
int WaitStatesNeeded = 0;
|
||||
|
||||
WaitStatesNeeded = checkSoftClauseHazards(SMRD);
|
||||
|
||||
// This SMRD hazard only affects SI.
|
||||
if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
|
||||
if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return WaitStatesNeeded;
|
||||
|
||||
// A read of an SGPR by SMRD instruction requires 4 wait states when the
|
||||
@ -399,7 +399,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
|
||||
if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 0;
|
||||
|
||||
int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
|
||||
|
@ -28,7 +28,7 @@ class MachineRegisterInfo;
|
||||
class ScheduleDAG;
|
||||
class SIInstrInfo;
|
||||
class SIRegisterInfo;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
// This variable stores the instruction that has been emitted this cycle. It
|
||||
@ -37,7 +37,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
MachineInstr *CurrCycleInstr;
|
||||
std::list<MachineInstr*> EmittedInstrs;
|
||||
const MachineFunction &MF;
|
||||
const SISubtarget &ST;
|
||||
const GCNSubtarget &ST;
|
||||
const SIInstrInfo &TII;
|
||||
const SIRegisterInfo &TRI;
|
||||
|
||||
|
@ -108,7 +108,7 @@ static void printLivenessInfo(raw_ostream &OS,
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
for (const auto R : Regions) {
|
||||
OS << "Region to schedule ";
|
||||
printRegion(OS, R->Begin, R->End, LIS, 1);
|
||||
@ -132,7 +132,7 @@ LLVM_DUMP_METHOD
|
||||
void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
|
||||
const GCNRegPressure &Before,
|
||||
const GCNRegPressure &After) const {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
OS << "RP before: ";
|
||||
Before.print(OS, &ST);
|
||||
OS << "RP after: ";
|
||||
@ -316,7 +316,7 @@ void GCNIterativeScheduler::schedule() { // overriden
|
||||
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
|
||||
dbgs() << "Max RP: ";
|
||||
Regions.back()->MaxPressure.print(
|
||||
dbgs(), &MF.getSubtarget<SISubtarget>());
|
||||
dbgs(), &MF.getSubtarget<GCNSubtarget>());
|
||||
} dbgs()
|
||||
<< '\n';);
|
||||
}
|
||||
@ -418,7 +418,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
|
||||
|
||||
#ifndef NDEBUG
|
||||
const auto RegionMaxRP = getRegionPressure(R);
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
#endif
|
||||
assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
|
||||
|| (dbgs() << "Max RP mismatch!!!\n"
|
||||
@ -433,7 +433,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
|
||||
|
||||
// Sort recorded regions by pressure - highest at the front
|
||||
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
llvm::sort(Regions.begin(), Regions.end(),
|
||||
[&ST, TargetOcc](const Region *R1, const Region *R2) {
|
||||
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
|
||||
@ -451,7 +451,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
|
||||
// BestSchedules aren't deleted on fail.
|
||||
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
|
||||
// TODO: assert Regions are sorted descending by pressure
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
|
||||
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
|
||||
<< ", current = " << Occ << '\n');
|
||||
@ -488,7 +488,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
|
||||
|
||||
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
|
||||
bool TryMaximizeOccupancy) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
auto TgtOcc = MFI->getMinAllowedOccupancy();
|
||||
|
||||
@ -542,7 +542,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
|
||||
// Minimal Register Strategy
|
||||
|
||||
void GCNIterativeScheduler::scheduleMinReg(bool force) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const auto TgtOcc = MFI->getOccupancy();
|
||||
sortRegionsByPressure(TgtOcc);
|
||||
@ -576,7 +576,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
|
||||
|
||||
void GCNIterativeScheduler::scheduleILP(
|
||||
bool TryMaximizeOccupancy) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
auto TgtOcc = MFI->getMinAllowedOccupancy();
|
||||
|
||||
|
@ -132,7 +132,7 @@ void GCNRegPressure::inc(unsigned Reg,
|
||||
}
|
||||
}
|
||||
|
||||
bool GCNRegPressure::less(const SISubtarget &ST,
|
||||
bool GCNRegPressure::less(const GCNSubtarget &ST,
|
||||
const GCNRegPressure& O,
|
||||
unsigned MaxOccupancy) const {
|
||||
const auto SGPROcc = std::min(MaxOccupancy,
|
||||
@ -178,7 +178,7 @@ bool GCNRegPressure::less(const SISubtarget &ST,
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const {
|
||||
void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const {
|
||||
OS << "VGPRs: " << getVGPRNum();
|
||||
if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')';
|
||||
OS << ", SGPRs: " << getSGPRNum();
|
||||
|
@ -49,7 +49,7 @@ struct GCNRegPressure {
|
||||
unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
|
||||
unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
|
||||
|
||||
unsigned getOccupancy(const SISubtarget &ST) const {
|
||||
unsigned getOccupancy(const GCNSubtarget &ST) const {
|
||||
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
|
||||
ST.getOccupancyWithNumVGPRs(getVGPRNum()));
|
||||
}
|
||||
@ -59,11 +59,11 @@ struct GCNRegPressure {
|
||||
LaneBitmask NewMask,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const {
|
||||
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const {
|
||||
return getOccupancy(ST) > O.getOccupancy(ST);
|
||||
}
|
||||
|
||||
bool less(const SISubtarget &ST, const GCNRegPressure& O,
|
||||
bool less(const GCNSubtarget &ST, const GCNRegPressure& O,
|
||||
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
|
||||
|
||||
bool operator==(const GCNRegPressure &O) const {
|
||||
@ -74,7 +74,7 @@ struct GCNRegPressure {
|
||||
return !(*this == O);
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const SISubtarget *ST = nullptr) const;
|
||||
void print(raw_ostream &OS, const GCNSubtarget *ST = nullptr) const;
|
||||
void dump() const { print(dbgs()); }
|
||||
|
||||
private:
|
||||
|
@ -35,7 +35,7 @@ void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
||||
|
||||
MF = &DAG->MF;
|
||||
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
|
||||
// FIXME: This is also necessary, because some passes that run after
|
||||
// scheduling and before regalloc increase register pressure.
|
||||
@ -294,7 +294,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
|
||||
GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
|
||||
std::unique_ptr<MachineSchedStrategy> S) :
|
||||
ScheduleDAGMILive(C, std::move(S)),
|
||||
ST(MF.getSubtarget<SISubtarget>()),
|
||||
ST(MF.getSubtarget<GCNSubtarget>()),
|
||||
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
|
||||
StartingOccupancy(MFI.getOccupancy()),
|
||||
MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
|
||||
|
@ -21,7 +21,7 @@ namespace llvm {
|
||||
|
||||
class SIMachineFunctionInfo;
|
||||
class SIRegisterInfo;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
/// This is a minimal scheduler strategy. The main difference between this
|
||||
/// and the GenericScheduler is that GCNSchedStrategy uses different
|
||||
@ -62,7 +62,7 @@ public:
|
||||
|
||||
class GCNScheduleDAGMILive : public ScheduleDAGMILive {
|
||||
|
||||
const SISubtarget &ST;
|
||||
const GCNSubtarget &ST;
|
||||
|
||||
SIMachineFunctionInfo &MFI;
|
||||
|
||||
|
@ -69,7 +69,7 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
|
||||
}
|
||||
|
||||
unsigned RsrcReg;
|
||||
if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
|
||||
// Evergreen / Northern Islands
|
||||
switch (MF.getFunction().getCallingConv()) {
|
||||
default: LLVM_FALLTHROUGH;
|
||||
|
@ -137,7 +137,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
||||
return 0;
|
||||
case CFStack::FIRST_NON_WQM_PUSH:
|
||||
assert(!ST->hasCaymanISA());
|
||||
if (ST->getGeneration() <= R600Subtarget::R700) {
|
||||
if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
|
||||
// +1 For the push operation.
|
||||
// +2 Extra space required.
|
||||
return 3;
|
||||
@ -150,7 +150,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
||||
return 2;
|
||||
}
|
||||
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
|
||||
assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
|
||||
assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
||||
// +1 For the push operation.
|
||||
// +1 Extra space required.
|
||||
return 2;
|
||||
@ -177,7 +177,7 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
|
||||
// See comment in
|
||||
// CFStack::getSubEntrySize()
|
||||
else if (CurrentEntries > 0 &&
|
||||
ST->getGeneration() > R600Subtarget::EVERGREEN &&
|
||||
ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
|
||||
!ST->hasCaymanISA() &&
|
||||
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
|
||||
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
|
||||
@ -250,7 +250,7 @@ private:
|
||||
|
||||
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
|
||||
unsigned Opcode = 0;
|
||||
bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
|
||||
bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
||||
switch (CFI) {
|
||||
case CF_TC:
|
||||
Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
|
||||
|
@ -791,7 +791,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
|
||||
DAG.getNode(ISD::FADD, DL, VT, FractPart,
|
||||
DAG.getConstantFP(-0.5, DL, MVT::f32)));
|
||||
if (Gen >= R600Subtarget::R700)
|
||||
if (Gen >= AMDGPUSubtarget::R700)
|
||||
return TrigVal;
|
||||
// On R600 hw, COS/SIN input must be between -Pi and Pi.
|
||||
return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
|
||||
|
@ -11,10 +11,10 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">;
|
||||
def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
|
||||
|
||||
def isR600toCayman : Predicate<
|
||||
"Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">;
|
||||
"Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
|
||||
|
||||
class R600Pat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
|
||||
let SubtargetPredicate = isR600toCayman;
|
||||
|
@ -1320,7 +1320,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
|
||||
const {
|
||||
assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");
|
||||
unsigned Opcode;
|
||||
if (ST.getGeneration() <= R600Subtarget::R700)
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::R700)
|
||||
Opcode = R600::DOT4_r600;
|
||||
else
|
||||
Opcode = R600::DOT4_eg;
|
||||
|
@ -346,7 +346,7 @@ void R600SchedStrategy::PrepareNextSlot() {
|
||||
LLVM_DEBUG(dbgs() << "New Slot\n");
|
||||
assert (OccupedSlotsMask && "Slot wasn't filled");
|
||||
OccupedSlotsMask = 0;
|
||||
// if (HwGen == R600Subtarget::NORTHERN_ISLANDS)
|
||||
// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
// OccupedSlotsMask |= 16;
|
||||
InstructionsGroupCandidate.clear();
|
||||
LoadAlu();
|
||||
|
@ -20,8 +20,6 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
|
||||
struct R600RegisterInfo final : public R600GenRegisterInfo {
|
||||
RegClassWeight RCW;
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;
|
||||
def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
|
||||
|
||||
let Predicates = [isR700] in {
|
||||
def SIN_r700 : SIN_Common<0x6E>;
|
||||
|
@ -63,7 +63,7 @@ FunctionPass *llvm::createSIDebuggerInsertNopsPass() {
|
||||
bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
|
||||
// specified.
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
if (!ST.debuggerInsertNops())
|
||||
return false;
|
||||
|
||||
|
@ -568,7 +568,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
|
||||
}
|
||||
|
||||
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
@ -47,7 +47,7 @@ char SIFixVGPRCopies::ID = 0;
|
||||
char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
|
||||
|
||||
bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
bool Changed = false;
|
||||
|
@ -185,7 +185,7 @@ bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
|
||||
// This doesn't actually need LiveIntervals, but we can preserve them.
|
||||
LIS = getAnalysisIfAvailable<LiveIntervals>();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
MachineRegisterInfo *MRI;
|
||||
const SIInstrInfo *TII;
|
||||
const SIRegisterInfo *TRI;
|
||||
const AMDGPUSubtarget *ST;
|
||||
const GCNSubtarget *ST;
|
||||
|
||||
void foldOperand(MachineOperand &OpToFold,
|
||||
MachineInstr *UseMI,
|
||||
@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
|
||||
MRI = &MF.getRegInfo();
|
||||
ST = &MF.getSubtarget<AMDGPUSubtarget>();
|
||||
ST = &MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST->getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
||||
|
@ -70,7 +70,7 @@ private:
|
||||
bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
|
||||
GCNDownwardRPTracker &RPT);
|
||||
|
||||
const SISubtarget *ST;
|
||||
const GCNSubtarget *ST;
|
||||
const SIRegisterInfo *TRI;
|
||||
const MachineRegisterInfo *MRI;
|
||||
SIMachineFunctionInfo *MFI;
|
||||
@ -296,7 +296,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
ST = &MF.getSubtarget<SISubtarget>();
|
||||
ST = &MF.getSubtarget<GCNSubtarget>();
|
||||
if (!ST->isXNACKEnabled())
|
||||
return false;
|
||||
|
||||
|
@ -23,19 +23,19 @@
|
||||
using namespace llvm;
|
||||
|
||||
|
||||
static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
|
||||
static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
|
||||
const MachineFunction &MF) {
|
||||
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
|
||||
ST.getMaxNumSGPRs(MF) / 4);
|
||||
}
|
||||
|
||||
static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
|
||||
static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
|
||||
const MachineFunction &MF) {
|
||||
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
|
||||
ST.getMaxNumSGPRs(MF));
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
|
||||
void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
@ -98,7 +98,7 @@ void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
|
||||
}
|
||||
|
||||
unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
||||
const SISubtarget &ST,
|
||||
const GCNSubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
@ -149,7 +149,7 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
|
||||
// SGPRs.
|
||||
std::pair<unsigned, unsigned>
|
||||
SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const SISubtarget &ST,
|
||||
const GCNSubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
@ -220,7 +220,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
|
||||
// specified.
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
if (ST.debuggerEmitPrologue())
|
||||
emitDebuggerPrologue(MF, MBB);
|
||||
|
||||
@ -364,7 +364,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
|
||||
void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
|
||||
void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
|
||||
MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
|
||||
MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
|
||||
unsigned ScratchRsrcReg) const {
|
||||
@ -508,7 +508,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
|
||||
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
|
||||
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
|
||||
LivePhysRegs LiveRegs(TRI);
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
@ -537,7 +537,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
@ -607,7 +607,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
if (FuncInfo->isEntryFunction())
|
||||
return;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
|
||||
@ -654,7 +654,7 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
||||
|
||||
int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const {
|
||||
const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
||||
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
return MF.getFrameInfo().getObjectOffset(FI);
|
||||
@ -668,7 +668,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||
if (!MFI.hasStackObjects())
|
||||
return;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
||||
@ -750,7 +750,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
||||
if (Amount == 0)
|
||||
return MBB.erase(I);
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const DebugLoc &DL = I->getDebugLoc();
|
||||
unsigned Opc = I->getOpcode();
|
||||
@ -779,7 +779,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
||||
|
||||
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
@ -829,7 +829,7 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
}
|
||||
|
||||
bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
|
||||
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||
const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
||||
// All stack operations are relative to the frame offset SGPR.
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
|
||||
|
@ -17,7 +17,7 @@ namespace llvm {
|
||||
class SIInstrInfo;
|
||||
class SIMachineFunctionInfo;
|
||||
class SIRegisterInfo;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
|
||||
class SIFrameLowering final : public AMDGPUFrameLowering {
|
||||
public:
|
||||
@ -48,19 +48,19 @@ public:
|
||||
MachineBasicBlock::iterator MI) const override;
|
||||
|
||||
private:
|
||||
void emitFlatScratchInit(const SISubtarget &ST,
|
||||
void emitFlatScratchInit(const GCNSubtarget &ST,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const;
|
||||
|
||||
unsigned getReservedPrivateSegmentBufferReg(
|
||||
const SISubtarget &ST,
|
||||
const GCNSubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg(
|
||||
const SISubtarget &ST,
|
||||
const GCNSubtarget &ST,
|
||||
const SIInstrInfo *TII,
|
||||
const SIRegisterInfo *TRI,
|
||||
SIMachineFunctionInfo *MFI,
|
||||
@ -70,7 +70,7 @@ private:
|
||||
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
||||
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
|
||||
void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF,
|
||||
void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
|
||||
MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
|
||||
MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
|
||||
unsigned ScratchRsrcReg) const;
|
||||
|
@ -112,7 +112,7 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) {
|
||||
}
|
||||
|
||||
SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
const SISubtarget &STI)
|
||||
const GCNSubtarget &STI)
|
||||
: AMDGPUTargetLowering(TM, STI),
|
||||
Subtarget(&STI) {
|
||||
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
|
||||
@ -378,7 +378,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
||||
|
||||
if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) {
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Legal);
|
||||
@ -667,7 +667,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
|
||||
}
|
||||
|
||||
const SISubtarget *SITargetLowering::getSubtarget() const {
|
||||
const GCNSubtarget *SITargetLowering::getSubtarget() const {
|
||||
return Subtarget;
|
||||
}
|
||||
|
||||
@ -708,12 +708,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
|
||||
if (RsrcIntr->IsImage) {
|
||||
Info.ptrVal = MFI->getImagePSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(RsrcIntr->RsrcArg));
|
||||
Info.align = 0;
|
||||
} else {
|
||||
Info.ptrVal = MFI->getBufferPSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
*MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(RsrcIntr->RsrcArg));
|
||||
}
|
||||
|
||||
@ -877,16 +877,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4)
|
||||
return isLegalGlobalAddressingMode(AM);
|
||||
|
||||
if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
// SMRD instructions have an 8-bit, dword offset on SI.
|
||||
if (!isUInt<8>(AM.BaseOffs / 4))
|
||||
return false;
|
||||
} else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) {
|
||||
} else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
// On CI+, this can also be a 32-bit literal constant offset. If it fits
|
||||
// in 8-bits, it can use a smaller encoding.
|
||||
if (!isUInt<32>(AM.BaseOffs / 4))
|
||||
return false;
|
||||
} else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
} else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
|
||||
if (!isUInt<20>(AM.BaseOffs))
|
||||
return false;
|
||||
@ -1560,7 +1560,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
|
||||
// the scratch registers to pass in.
|
||||
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
if (ST.isAmdCodeObjectV2(MF.getFunction())) {
|
||||
if (RequiresStackAccess) {
|
||||
// If we have stack objects, we unquestionably need the private buffer
|
||||
@ -1676,7 +1676,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
const Function &Fn = MF.getFunction();
|
||||
FunctionType *FType = MF.getFunction().getFunctionType();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
|
||||
DiagnosticInfoUnsupported NoGraphicsHSA(
|
||||
@ -1808,7 +1808,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
|
||||
auto *ParamTy =
|
||||
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
|
||||
if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
|
||||
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// On SI local pointers are just offsets into LDS, so they are always
|
||||
// less than 16-bits. On CI and newer they could potentially be
|
||||
@ -2668,7 +2668,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
|
||||
|
||||
}
|
||||
|
||||
if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
|
||||
Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
|
||||
report_fatal_error(Twine("invalid register \""
|
||||
+ StringRef(RegName) + "\" for subtarget."));
|
||||
@ -2959,7 +2959,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
|
||||
// Control flow needs to be inserted if indexing with a VGPR.
|
||||
static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
const SISubtarget &ST) {
|
||||
const GCNSubtarget &ST) {
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
@ -3050,7 +3050,7 @@ static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI,
|
||||
|
||||
static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
const SISubtarget &ST) {
|
||||
const GCNSubtarget &ST) {
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
@ -3964,7 +3964,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc SL(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
|
||||
if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa ||
|
||||
if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
|
||||
!Subtarget->isTrapHandlerEnabled())
|
||||
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
|
||||
|
||||
@ -3979,7 +3979,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
QueuePtr, SDValue());
|
||||
SDValue Ops[] = {
|
||||
ToReg,
|
||||
DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16),
|
||||
DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16),
|
||||
SGPR01,
|
||||
ToReg.getValue(1)
|
||||
};
|
||||
@ -3991,7 +3991,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa ||
|
||||
if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
|
||||
!Subtarget->isTrapHandlerEnabled()) {
|
||||
DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
|
||||
"debugtrap handler not supported",
|
||||
@ -4004,7 +4004,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16)
|
||||
DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16)
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
|
||||
}
|
||||
@ -4513,7 +4513,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
|
||||
MVT StoreVT = VData.getSimpleValueType();
|
||||
if (StoreVT.getScalarType() == MVT::f16) {
|
||||
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
|
||||
!BaseOpcode->HasD16)
|
||||
return Op; // D16 is unsupported for this instruction
|
||||
|
||||
@ -4526,7 +4526,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
} else {
|
||||
MVT LoadVT = Op.getSimpleValueType();
|
||||
if (LoadVT.getScalarType() == MVT::f16) {
|
||||
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
|
||||
!BaseOpcode->HasD16)
|
||||
return Op; // D16 is unsupported for this instruction
|
||||
|
||||
@ -4620,7 +4620,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
|
||||
int Opcode = -1;
|
||||
|
||||
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
|
||||
NumVDataDwords, NumVAddrDwords);
|
||||
if (Opcode == -1)
|
||||
@ -4699,16 +4699,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_rsq:
|
||||
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
|
||||
case Intrinsic::amdgcn_rsq_legacy:
|
||||
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return emitRemovedIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
|
||||
case Intrinsic::amdgcn_rcp_legacy:
|
||||
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return emitRemovedIntrinsicError(DAG, DL, VT);
|
||||
return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
|
||||
case Intrinsic::amdgcn_rsq_clamp: {
|
||||
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
|
||||
|
||||
Type *Type = VT.getTypeForEVT(*DAG.getContext());
|
||||
@ -4845,7 +4845,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
|
||||
|
||||
case Intrinsic::amdgcn_log_clamp: {
|
||||
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return SDValue();
|
||||
|
||||
DiagnosticInfoUnsupported BadIntrin(
|
||||
@ -5278,7 +5278,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
}
|
||||
case Intrinsic::amdgcn_s_barrier: {
|
||||
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
|
||||
if (WGSize <= ST.getWavefrontSize())
|
||||
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
|
||||
@ -5889,7 +5889,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue Scale;
|
||||
|
||||
if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
// Workaround a hardware bug on SI where the condition output from div_scale
|
||||
// is not usable.
|
||||
|
||||
@ -6709,7 +6709,7 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
|
||||
}
|
||||
|
||||
static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
|
||||
const SISubtarget *ST, unsigned MaxDepth=5) {
|
||||
const GCNSubtarget *ST, unsigned MaxDepth=5) {
|
||||
// If source is a result of another standard FP operation it is already in
|
||||
// canonical form.
|
||||
|
||||
@ -8296,7 +8296,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
|
||||
if (R)
|
||||
{
|
||||
const MachineFunction * MF = FLI->MF;
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
|
||||
unsigned Reg = R->getReg();
|
||||
|
@ -23,7 +23,7 @@ namespace llvm {
|
||||
|
||||
class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
private:
|
||||
const SISubtarget *Subtarget;
|
||||
const GCNSubtarget *Subtarget;
|
||||
|
||||
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
|
||||
SDValue Chain, uint64_t Offset) const;
|
||||
@ -162,9 +162,9 @@ private:
|
||||
bool shouldEmitPCReloc(const GlobalValue *GV) const;
|
||||
|
||||
public:
|
||||
SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
|
||||
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
|
||||
|
||||
const SISubtarget *getSubtarget() const;
|
||||
const GCNSubtarget *getSubtarget() const;
|
||||
|
||||
bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override;
|
||||
|
||||
|
@ -339,7 +339,7 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
|
||||
}
|
||||
|
||||
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
SkipThreshold = SkipThresholdFlag;
|
||||
|
@ -136,7 +136,7 @@ enum RegisterMapping {
|
||||
// "s_waitcnt 0" before use.
|
||||
class BlockWaitcntBrackets {
|
||||
public:
|
||||
BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) {
|
||||
BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
|
||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||
T = (enum InstCounterType)(T + 1)) {
|
||||
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
|
||||
@ -314,7 +314,7 @@ public:
|
||||
void dump() { print(dbgs()); }
|
||||
|
||||
private:
|
||||
const SISubtarget *ST = nullptr;
|
||||
const GCNSubtarget *ST = nullptr;
|
||||
bool WaitAtBeginning = false;
|
||||
bool RevisitLoop = false;
|
||||
bool MixedExpTypes = false;
|
||||
@ -364,7 +364,7 @@ private:
|
||||
|
||||
class SIInsertWaitcnts : public MachineFunctionPass {
|
||||
private:
|
||||
const SISubtarget *ST = nullptr;
|
||||
const GCNSubtarget *ST = nullptr;
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
const SIRegisterInfo *TRI = nullptr;
|
||||
const MachineRegisterInfo *MRI = nullptr;
|
||||
@ -1837,7 +1837,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
|
||||
}
|
||||
|
||||
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
||||
ST = &MF.getSubtarget<SISubtarget>();
|
||||
ST = &MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST->getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
@ -12,10 +12,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isGCN : Predicate<"Subtarget->getGeneration() "
|
||||
">= SISubtarget::SOUTHERN_ISLANDS">,
|
||||
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureGCN">;
|
||||
def isSI : Predicate<"Subtarget->getGeneration() "
|
||||
"== SISubtarget::SOUTHERN_ISLANDS">,
|
||||
"== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureSouthernIslands">;
|
||||
|
||||
|
||||
|
@ -84,7 +84,7 @@ static cl::opt<unsigned>
|
||||
BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
|
||||
cl::desc("Restrict range of branch instructions (DEBUG)"));
|
||||
|
||||
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
|
||||
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
|
||||
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
|
||||
RI(ST), ST(ST) {}
|
||||
|
||||
@ -1035,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
|
||||
unsigned FrameOffset, unsigned Size) const {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
|
||||
unsigned WavefrontSize = ST.getWavefrontSize();
|
||||
@ -2915,7 +2915,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
||||
}
|
||||
}
|
||||
|
||||
if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
|
||||
if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
|
||||
const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
|
||||
if (Offset->getImm() != 0) {
|
||||
ErrInfo = "subtarget does not support offsets in flat instructions";
|
||||
@ -3666,8 +3666,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
|
||||
} else {
|
||||
// This instructions is the _OFFSET variant, so we need to convert it to
|
||||
// ADDR64.
|
||||
assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
|
||||
< SISubtarget::VOLCANIC_ISLANDS &&
|
||||
assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
|
||||
< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||
"FIXME: Need to emit flat atomics here");
|
||||
|
||||
MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
|
||||
@ -3803,37 +3803,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_LSHL_B32:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_ASHR_I32:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHR_B32:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHL_B64:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHLREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_ASHR_I64:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_ASHRREV_I64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHR_B64:
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHRREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
@ -4633,12 +4633,12 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
|
||||
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
|
||||
if (ST.isAmdHsaOS()) {
|
||||
// Set ATC = 1. GFX9 doesn't have this bit.
|
||||
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
RsrcDataFormat |= (1ULL << 56);
|
||||
|
||||
// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
|
||||
// BTW, it disables TC L2 and therefore decreases performance.
|
||||
if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
RsrcDataFormat |= (2ULL << 59);
|
||||
}
|
||||
|
||||
@ -4651,7 +4651,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
|
||||
0xffffffff; // Size;
|
||||
|
||||
// GFX9 doesn't have ELEMENT_SIZE.
|
||||
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
|
||||
Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
|
||||
}
|
||||
@ -4661,7 +4661,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
|
||||
|
||||
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
|
||||
// Clear them unless we want a huge stride.
|
||||
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
|
||||
|
||||
return Rsrc23;
|
||||
@ -4996,13 +4996,15 @@ enum SIEncodingFamily {
|
||||
GFX9 = 5
|
||||
};
|
||||
|
||||
static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) {
|
||||
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
|
||||
switch (ST.getGeneration()) {
|
||||
case SISubtarget::SOUTHERN_ISLANDS:
|
||||
case SISubtarget::SEA_ISLANDS:
|
||||
default:
|
||||
break;
|
||||
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
return SIEncodingFamily::SI;
|
||||
case SISubtarget::VOLCANIC_ISLANDS:
|
||||
case SISubtarget::GFX9:
|
||||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
case AMDGPUSubtarget::GFX9:
|
||||
return SIEncodingFamily::VI;
|
||||
}
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
@ -5012,11 +5014,11 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
|
||||
|
||||
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
|
||||
ST.getGeneration() >= SISubtarget::GFX9)
|
||||
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
|
||||
Gen = SIEncodingFamily::GFX9;
|
||||
|
||||
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
|
||||
Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
||||
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
||||
: SIEncodingFamily::SDWA;
|
||||
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
|
||||
// subtarget has UnpackedD16VMem feature.
|
||||
|
@ -39,13 +39,13 @@ namespace llvm {
|
||||
class APInt;
|
||||
class MachineRegisterInfo;
|
||||
class RegScavenger;
|
||||
class SISubtarget;
|
||||
class GCNSubtarget;
|
||||
class TargetRegisterClass;
|
||||
|
||||
class SIInstrInfo final : public AMDGPUGenInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
const SISubtarget &ST;
|
||||
const GCNSubtarget &ST;
|
||||
|
||||
// The inverse predicate should have the negative value.
|
||||
enum BranchPredicate {
|
||||
@ -147,7 +147,7 @@ public:
|
||||
MO_REL32_HI = 5
|
||||
};
|
||||
|
||||
explicit SIInstrInfo(const SISubtarget &ST);
|
||||
explicit SIInstrInfo(const GCNSubtarget &ST);
|
||||
|
||||
const SIRegisterInfo &getRegisterInfo() const {
|
||||
return RI;
|
||||
|
@ -7,12 +7,12 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
def isCI : Predicate<"Subtarget->getGeneration() "
|
||||
">= SISubtarget::SEA_ISLANDS">;
|
||||
">= AMDGPUSubtarget::SEA_ISLANDS">;
|
||||
def isCIOnly : Predicate<"Subtarget->getGeneration() =="
|
||||
"SISubtarget::SEA_ISLANDS">,
|
||||
"AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate <"FeatureSeaIslands">;
|
||||
def isVIOnly : Predicate<"Subtarget->getGeneration() =="
|
||||
"SISubtarget::VOLCANIC_ISLANDS">,
|
||||
"AMDGPUSubtarget::VOLCANIC_ISLANDS">,
|
||||
AssemblerPredicate <"FeatureVolcanicIslands">;
|
||||
|
||||
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
||||
@ -487,7 +487,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
|
||||
}]>;
|
||||
|
||||
class VGPRImm <dag frag> : PatLeaf<frag, [{
|
||||
if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) {
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
return false;
|
||||
}
|
||||
const SIRegisterInfo *SIRI =
|
||||
|
@ -103,7 +103,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
|
||||
};
|
||||
|
||||
private:
|
||||
const SISubtarget *STM = nullptr;
|
||||
const GCNSubtarget *STM = nullptr;
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
const SIRegisterInfo *TRI = nullptr;
|
||||
MachineRegisterInfo *MRI = nullptr;
|
||||
@ -939,7 +939,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
STM = &MF.getSubtarget<SISubtarget>();
|
||||
STM = &MF.getSubtarget<GCNSubtarget>();
|
||||
if (!STM->loadStoreOptEnabled())
|
||||
return false;
|
||||
|
||||
|
@ -486,7 +486,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
|
||||
}
|
||||
|
||||
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
||||
|
@ -66,7 +66,7 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
|
||||
|
||||
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
|
||||
|
@ -47,7 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
ImplicitArgPtr(false),
|
||||
GITPtrHigh(0xffffffff),
|
||||
HighBitsOf32BitAddress(0) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const Function &F = MF.getFunction();
|
||||
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
|
||||
WavesPerEU = ST.getWavesPerEU(F);
|
||||
@ -178,7 +178,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
|
||||
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
|
||||
limitOccupancy(getMaxWavesPerEU());
|
||||
const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
|
||||
limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
|
||||
MF.getFunction()));
|
||||
}
|
||||
@ -253,7 +253,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
||||
if (!SpillLanes.empty())
|
||||
return true;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
@ -257,12 +257,12 @@ protected:
|
||||
|
||||
IsaInfo::IsaVersion IV;
|
||||
|
||||
SICacheControl(const SISubtarget &ST);
|
||||
SICacheControl(const GCNSubtarget &ST);
|
||||
|
||||
public:
|
||||
|
||||
/// Create a cache control for the subtarget \p ST.
|
||||
static std::unique_ptr<SICacheControl> create(const SISubtarget &ST);
|
||||
static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
|
||||
|
||||
/// Update \p MI memory load instruction to bypass any caches up to
|
||||
/// the \p Scope memory scope for address spaces \p
|
||||
@ -322,7 +322,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
SIGfx6CacheControl(const SISubtarget &ST) : SICacheControl(ST) {};
|
||||
SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
|
||||
|
||||
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
@ -346,7 +346,7 @@ public:
|
||||
class SIGfx7CacheControl : public SIGfx6CacheControl {
|
||||
public:
|
||||
|
||||
SIGfx7CacheControl(const SISubtarget &ST) : SIGfx6CacheControl(ST) {};
|
||||
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
|
||||
|
||||
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
@ -606,14 +606,14 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
||||
return constructFromMIWithMMO(MI);
|
||||
}
|
||||
|
||||
SICacheControl::SICacheControl(const SISubtarget &ST) {
|
||||
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
||||
TII = ST.getInstrInfo();
|
||||
IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
|
||||
}
|
||||
|
||||
/* static */
|
||||
std::unique_ptr<SICacheControl> SICacheControl::create(const SISubtarget &ST) {
|
||||
AMDGPUSubtarget::Generation Generation = ST.getGeneration();
|
||||
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
|
||||
GCNSubtarget::Generation Generation = ST.getGeneration();
|
||||
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return make_unique<SIGfx6CacheControl>(ST);
|
||||
return make_unique<SIGfx7CacheControl>(ST);
|
||||
@ -1012,7 +1012,7 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
|
||||
SIMemOpAccess MOA(MF);
|
||||
CC = SICacheControl::create(MF.getSubtarget<SISubtarget>());
|
||||
CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
|
||||
|
||||
for (auto &MBB : MF) {
|
||||
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
|
||||
|
@ -209,7 +209,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
|
@ -107,7 +107,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
@ -90,9 +90,9 @@ public:
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
void matchSDWAOperands(MachineBasicBlock &MBB);
|
||||
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
|
||||
bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const;
|
||||
bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const;
|
||||
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
|
||||
void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const;
|
||||
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
|
||||
|
||||
StringRef getPassName() const override { return "SI Peephole SDWA"; }
|
||||
|
||||
@ -855,7 +855,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
|
||||
}
|
||||
|
||||
bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
|
||||
const SISubtarget &ST) const {
|
||||
const GCNSubtarget &ST) const {
|
||||
// Check if this is already an SDWA instruction
|
||||
unsigned Opc = MI.getOpcode();
|
||||
if (TII->isSDWA(Opc))
|
||||
@ -1082,7 +1082,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
|
||||
// If an instruction was converted to SDWA it should not have immediates or SGPR
|
||||
// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
|
||||
void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
|
||||
const SISubtarget &ST) const {
|
||||
const GCNSubtarget &ST) const {
|
||||
const MCInstrDesc &Desc = TII->get(MI.getOpcode());
|
||||
unsigned ConstantBusCount = 0;
|
||||
for (MachineOperand &Op : MI.explicit_uses()) {
|
||||
@ -1113,7 +1113,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
|
||||
}
|
||||
|
||||
bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
@ -56,7 +56,7 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
|
||||
cl::ReallyHidden,
|
||||
cl::init(true));
|
||||
|
||||
SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
|
||||
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
|
||||
AMDGPURegisterInfo(),
|
||||
SGPRPressureSets(getNumRegPressureSets()),
|
||||
VGPRPressureSets(getNumRegPressureSets()),
|
||||
@ -106,7 +106,7 @@ SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
|
||||
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
|
||||
const MachineFunction &MF) const {
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
|
||||
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
|
||||
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
||||
@ -131,7 +131,7 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
|
||||
|
||||
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
|
||||
const MachineFunction &MF) const {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
|
||||
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
|
||||
}
|
||||
@ -173,7 +173,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
|
||||
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
|
||||
@ -253,7 +253,7 @@ bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
|
||||
// create a virtual register for it during frame index elimination, so the
|
||||
// scavenger is directly needed.
|
||||
return MF.getFrameInfo().hasStackObjects() &&
|
||||
MF.getSubtarget<SISubtarget>().hasScalarStores() &&
|
||||
MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
|
||||
MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
|
||||
}
|
||||
|
||||
@ -308,7 +308,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
DL = Ins->getDebugLoc();
|
||||
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
|
||||
if (Offset == 0) {
|
||||
@ -337,7 +337,7 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
|
||||
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
|
||||
#ifndef NDEBUG
|
||||
@ -524,7 +524,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
RegScavenger *RS) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
|
||||
@ -647,7 +647,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
||||
return false;
|
||||
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
unsigned SuperReg = MI->getOperand(0).getReg();
|
||||
@ -825,7 +825,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
||||
return false;
|
||||
|
||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
|
||||
@ -985,7 +985,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
|
||||
@ -1527,7 +1527,7 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
|
||||
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
MachineFunction &MF) const {
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
|
||||
|
@ -21,10 +21,9 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
class GCNSubtarget;
|
||||
class LiveIntervals;
|
||||
class MachineRegisterInfo;
|
||||
class SISubtarget;
|
||||
class SIMachineFunctionInfo;
|
||||
|
||||
class SIRegisterInfo final : public AMDGPURegisterInfo {
|
||||
@ -39,7 +38,7 @@ private:
|
||||
void classifyPressureSet(unsigned PSetID, unsigned Reg,
|
||||
BitVector &PressureSets) const;
|
||||
public:
|
||||
SIRegisterInfo(const SISubtarget &ST);
|
||||
SIRegisterInfo(const GCNSubtarget &ST);
|
||||
|
||||
bool spillSGPRToVGPR() const {
|
||||
return SpillSGPRToVGPR;
|
||||
|
@ -292,7 +292,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
|
@ -849,7 +849,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
||||
LowerToCopyInstrs.clear();
|
||||
CallingConv = MF.getFunction().getCallingConv();
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
Loading…
Reference in New Issue
Block a user