From f3ae5b9b8c4e97b02b68b4d2fdf578a70f7ea620 Mon Sep 17 00:00:00 2001 From: dfukalov Date: Wed, 20 Jan 2021 15:48:02 +0300 Subject: [PATCH] [NFC][AMDGPU] Split AMDGPUSubtarget.h to R600 and GCN subtargets ... to reduce headers dependency. Reviewed By: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D95036 --- .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 2 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 +- lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 2 +- lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 1 - lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 1 - .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 2 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 1 - lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 1 - lib/Target/AMDGPU/AMDGPULibCalls.cpp | 2 +- lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp | 1 + .../AMDGPU/AMDGPULowerKernelArguments.cpp | 2 +- .../AMDGPU/AMDGPULowerKernelAttributes.cpp | 1 - lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 2 +- .../AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 2 +- lib/Target/AMDGPU/AMDGPUMacroFusion.cpp | 4 +- .../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 3 +- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 2 +- .../AMDGPU/AMDGPUPropagateAttributes.cpp | 6 +- lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp | 3 +- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 5 + lib/Target/AMDGPU/AMDGPUSubtarget.h | 1194 +---------------- lib/Target/AMDGPU/AMDGPUTargetMachine.h | 3 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 25 + lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 41 +- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 3 +- lib/Target/AMDGPU/GCNDPPCombine.cpp | 3 +- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 3 +- lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 1 - lib/Target/AMDGPU/GCNNSAReassign.cpp | 2 +- lib/Target/AMDGPU/GCNRegBankReassign.cpp | 2 +- lib/Target/AMDGPU/GCNRegPressure.cpp | 1 - lib/Target/AMDGPU/GCNRegPressure.h | 2 +- lib/Target/AMDGPU/GCNSchedStrategy.cpp | 1 - lib/Target/AMDGPU/GCNSubtarget.h | 1059 +++++++++++++++ .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 2 +- lib/Target/AMDGPU/R600AsmPrinter.cpp | 3 +- lib/Target/AMDGPU/R600ClauseMergePass.cpp | 3 +- .../AMDGPU/R600ControlFlowFinalizer.cpp | 3 +- lib/Target/AMDGPU/R600EmitClauseMarkers.cpp | 3 +- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 3 +- lib/Target/AMDGPU/R600FrameLowering.cpp | 2 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 2 +- lib/Target/AMDGPU/R600InstrInfo.cpp | 3 +- lib/Target/AMDGPU/R600MachineScheduler.cpp | 3 +- .../AMDGPU/R600OptimizeVectorRegisters.cpp | 3 +- lib/Target/AMDGPU/R600Packetizer.cpp | 3 +- lib/Target/AMDGPU/R600RegisterInfo.cpp | 3 +- lib/Target/AMDGPU/R600Subtarget.h | 174 +++ lib/Target/AMDGPU/SIAddIMGInit.cpp | 3 +- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp | 2 +- lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 3 +- lib/Target/AMDGPU/SIFixVGPRCopies.cpp | 3 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 3 +- lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 1 - lib/Target/AMDGPU/SIFrameLowering.cpp | 3 +- lib/Target/AMDGPU/SIISelLowering.cpp | 1 - lib/Target/AMDGPU/SIInsertHardClauses.cpp | 3 +- lib/Target/AMDGPU/SIInsertSkips.cpp | 3 +- lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 3 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +- lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 +- lib/Target/AMDGPU/SILowerControlFlow.cpp | 3 +- lib/Target/AMDGPU/SILowerI1Copies.cpp | 3 +- lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 3 +- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 - lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 3 +- lib/Target/AMDGPU/SIModeRegister.cpp | 3 +- lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 3 +- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 3 +- lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 3 +- lib/Target/AMDGPU/SIPostRABundler.cpp | 2 +- lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 3 +- lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 3 +- lib/Target/AMDGPU/SIRegisterInfo.cpp | 4 +- .../AMDGPU/SIRemoveShortExecBranches.cpp | 3 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 3 +- lib/Target/AMDGPU/SIWholeQuadMode.cpp | 3 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 3 +- .../AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp | 3 +- 82 files changed, 1389 insertions(+), 1298 deletions(-) create mode 100644 lib/Target/AMDGPU/GCNSubtarget.h create mode 100644 lib/Target/AMDGPU/R600Subtarget.h diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 7590776b813..a4e72f78723 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 886a5115d6a..c655e5ec87b 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPU.h" #include "AMDGPUHSAMetadataStreamer.h" -#include "AMDGPUSubtarget.h" #include "AMDKernelCodeT.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600AsmPrinter.h" diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 1ccb4d5c550..aae2a54c198 100644 --- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index b86052e3a14..852a05b3c18 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -15,7 +15,6 @@ #include "AMDGPUCallLowering.h" #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b867317f579..315f090d29f 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 72301b21e84..c7a9f8fa428 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -14,7 +14,7 @@ #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIMachineFunctionInfo.h" #include "SIProgramInfo.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 59440b21f82..3c66745c0e7 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index ad4adf48305..0b4b4776ad3 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -16,7 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 9c86abfa5bf..06aa0055e4b 100644 --- a/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -16,6 +16,8 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetTransformInfo.h" +#include "GCNSubtarget.h" +#include "R600Subtarget.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 2fae3d69a70..7255a061b26 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -16,7 +16,6 @@ #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterBankInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 00ffa339062..6b7f57252b7 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -13,7 +13,7 @@ #include "AMDGPU.h" #include "AMDGPULibFunc.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/IntrinsicsAMDGPU.h" diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index a554c27a77a..714e74faaf1 100644 --- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 394c66a022f..8fb4f93fd4b 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/MDBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 04d6c1090ea..9ab6a5246ce 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUTargetMachine.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 3d2ac2c49ce..a8cba3f5cc5 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,9 +13,9 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index cf991a16307..b6a69b2819e 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp index 00bede2be36..c15c94ee17f 100644 --- a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp +++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMacroFusion.h" -#include "AMDGPUSubtarget.h" - +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" #include "llvm/CodeGen/MacroFusion.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 33f775e0dee..09e2c762abd 100644 --- a/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -13,7 +13,8 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 1dabcea0f7a..2a6ea838efc 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index 9095715f923..cd71c7a16c7 100644 --- a/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -27,11 +27,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" + #define DEBUG_TYPE "amdgpu-propagate-attributes" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index c073533702c..d644c031928 100644 --- a/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -13,8 +13,7 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" -#include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 17b1458ddf7..502356d4f9a 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -73,7 +73,7 @@ #include "AMDGPU.h" #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index cf48e0bcf45..174aac5abb5 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -604,6 +604,11 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, return alignTo(TotalSize, 4); } +AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const { + return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 + : AMDGPUDwarfFlavour::Wave64; +} + R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 5955d14e809..ba3a8acae55 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1,4 +1,4 @@ -//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// +//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,46 +7,24 @@ //==-----------------------------------------------------------------------===// // /// \file -/// AMDGPU specific subclass of TargetSubtarget. +/// Base class for AMDGPU specific classes of TargetSubtarget. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H -#include "AMDGPUCallLowering.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "R600FrameLowering.h" -#include "R600ISelLowering.h" -#include "R600InstrInfo.h" -#include "SIFrameLowering.h" -#include "SIISelLowering.h" -#include "SIInstrInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/Alignment.h" namespace llvm { -class MCInst; -class MCInstrInfo; - -} // namespace llvm - -#define GET_SUBTARGETINFO_HEADER -#include "AMDGPUGenSubtargetInfo.inc" -#define GET_SUBTARGETINFO_HEADER -#include "R600GenSubtargetInfo.inc" - -namespace llvm { - +enum AMDGPUDwarfFlavour : unsigned; class Function; class Instruction; class MachineFunction; -class StringRef; class TargetMachine; -class GCNTargetMachine; class AMDGPUSubtarget { public: @@ -254,1175 +232,11 @@ public: /// \returns Corresponsing DWARF register number mapping flavour for the /// \p WavefrontSize. - AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const { - return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 - : AMDGPUDwarfFlavour::Wave64; - } + AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; virtual ~AMDGPUSubtarget() {} }; -class GCNSubtarget : public AMDGPUGenSubtargetInfo, - public AMDGPUSubtarget { - - using AMDGPUSubtarget::getMaxWavesPerEU; - -public: - enum TrapHandlerAbi { - TrapHandlerAbiNone = 0, - TrapHandlerAbiHsa = 1 - }; - - enum TrapID { - TrapIDHardwareReserved = 0, - TrapIDHSADebugTrap = 1, - TrapIDLLVMTrap = 2, - TrapIDLLVMDebugTrap = 3, - TrapIDDebugBreakpoint = 7, - TrapIDDebugReserved8 = 8, - TrapIDDebugReservedFE = 0xfe, - TrapIDDebugReservedFF = 0xff - }; - - enum TrapRegValues { - LLVMTrapHandlerRegValue = 1 - }; - -private: - /// GlobalISel related APIs. - std::unique_ptr CallLoweringInfo; - std::unique_ptr InlineAsmLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - -protected: - // Basic subtarget description. - Triple TargetTriple; - unsigned Gen; - InstrItineraryData InstrItins; - int LDSBankCount; - unsigned MaxPrivateElementSize; - - // Possibly statically set by tablegen, but may want to be overridden. - bool FastFMAF32; - bool FastDenormalF32; - bool HalfRate64Ops; - - // Dynamically set bits that enable features. - bool FlatForGlobal; - bool AutoWaitcntBeforeBarrier; - bool UnalignedScratchAccess; - bool UnalignedAccessMode; - bool HasApertureRegs; - bool EnableXNACK; - bool DoesNotSupportXNACK; - bool EnableCuMode; - bool TrapHandler; - - // Used as options. - bool EnableLoadStoreOpt; - bool EnableUnsafeDSOffsetFolding; - bool EnableSIScheduler; - bool EnableDS128; - bool EnablePRTStrictNull; - bool DumpCode; - - // Subtarget statically properties set by tablegen - bool FP64; - bool FMA; - bool MIMG_R128; - bool IsGCN; - bool GCN3Encoding; - bool CIInsts; - bool GFX8Insts; - bool GFX9Insts; - bool GFX10Insts; - bool GFX10_3Insts; - bool GFX7GFX8GFX9Insts; - bool SGPRInitBug; - bool HasSMemRealTime; - bool HasIntClamp; - bool HasFmaMixInsts; - bool HasMovrel; - bool HasVGPRIndexMode; - bool HasScalarStores; - bool HasScalarAtomics; - bool HasSDWAOmod; - bool HasSDWAScalar; - bool HasSDWASdst; - bool HasSDWAMac; - bool HasSDWAOutModsVOPC; - bool HasDPP; - bool HasDPP8; - bool HasR128A16; - bool HasGFX10A16; - bool HasG16; - bool HasNSAEncoding; - bool GFX10_BEncoding; - bool HasDLInsts; - bool HasDot1Insts; - bool HasDot2Insts; - bool HasDot3Insts; - bool HasDot4Insts; - bool HasDot5Insts; - bool HasDot6Insts; - bool HasMAIInsts; - bool HasPkFmacF16Inst; - bool HasAtomicFaddInsts; - bool EnableSRAMECC; - bool DoesNotSupportSRAMECC; - bool HasNoSdstCMPX; - bool HasVscnt; - bool HasGetWaveIdInst; - bool HasSMemTimeInst; - bool HasRegisterBanking; - bool HasVOP3Literal; - bool HasNoDataDepHazard; - bool FlatAddressSpace; - bool FlatInstOffsets; - bool FlatGlobalInsts; - bool FlatScratchInsts; - bool ScalarFlatScratchInsts; - bool AddNoCarryInsts; - bool HasUnpackedD16VMem; - bool R600ALUInst; - bool CaymanISA; - bool CFALUBug; - bool LDSMisalignedBug; - bool HasMFMAInlineLiteralBug; - bool HasVertexCache; - short TexVTXClauseSize; - bool UnalignedBufferAccess; - bool UnalignedDSAccess; - bool ScalarizeGlobal; - - bool HasVcmpxPermlaneHazard; - bool HasVMEMtoScalarWriteHazard; - bool HasSMEMtoVectorWriteHazard; - bool HasInstFwdPrefetchBug; - bool HasVcmpxExecWARHazard; - bool HasLdsBranchVmemWARHazard; - bool HasNSAtoVMEMBug; - bool HasOffset3fBug; - bool HasFlatSegmentOffsetBug; - bool HasImageStoreD16Bug; - bool HasImageGather4D16Bug; - - // Dummy feature to use for assembler in tablegen. - bool FeatureDisable; - - SelectionDAGTargetInfo TSInfo; -private: - SIInstrInfo InstrInfo; - SITargetLowering TLInfo; - SIFrameLowering FrameLowering; - -public: - // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. - static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); - - GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM); - ~GCNSubtarget() override; - - GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); - - const SIInstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const SIFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const SITargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const SIRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InlineAsmLowering *getInlineAsmLowering() const override { - return InlineAsmLoweringInfo.get(); - } - - InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } - - // Nothing implemented, just prevent crashes on use. - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - - Generation getGeneration() const { - return (Generation)Gen; - } - - /// Return the number of high bits known to be zero fror a frame index. - unsigned getKnownHighZeroBitsForFrameIndex() const { - return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); - } - - int getLDSBankCount() const { - return LDSBankCount; - } - - unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { - return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; - } - - unsigned getConstantBusLimit(unsigned Opcode) const; - - bool hasIntClamp() const { - return HasIntClamp; - } - - bool hasFP64() const { - return FP64; - } - - bool hasMIMG_R128() const { - return MIMG_R128; - } - - bool hasHWFP64() const { - return FP64; - } - - bool hasFastFMAF32() const { - return FastFMAF32; - } - - bool hasHalfRate64Ops() const { - return HalfRate64Ops; - } - - bool hasAddr64() const { - return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); - } - - bool hasFlat() const { - return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); - } - - // Return true if the target only has the reverse operand versions of VALU - // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). - bool hasOnlyRevVALUShifts() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasFractBug() const { - return getGeneration() == SOUTHERN_ISLANDS; - } - - bool hasBFE() const { - return true; - } - - bool hasBFI() const { - return true; - } - - bool hasBFM() const { - return hasBFE(); - } - - bool hasBCNT(unsigned Size) const { - return true; - } - - bool hasFFBL() const { - return true; - } - - bool hasFFBH() const { - return true; - } - - bool hasMed3_16() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - bool hasMin3Max3_16() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - bool hasFmaMixInsts() const { - return HasFmaMixInsts; - } - - bool hasCARRY() const { - return true; - } - - bool hasFMA() const { - return FMA; - } - - bool hasSwap() const { - return GFX9Insts; - } - - bool hasScalarPackInsts() const { - return GFX9Insts; - } - - bool hasScalarMulHiInsts() const { - return GFX9Insts; - } - - TrapHandlerAbi getTrapHandlerAbi() const { - return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; - } - - /// True if the offset field of DS instructions works as expected. On SI, the - /// offset uses a 16-bit adder and does not always wrap properly. - bool hasUsableDSOffset() const { - return getGeneration() >= SEA_ISLANDS; - } - - bool unsafeDSOffsetFoldingEnabled() const { - return EnableUnsafeDSOffsetFolding; - } - - /// Condition output from div_scale is usable. - bool hasUsableDivScaleConditionOutput() const { - return getGeneration() != SOUTHERN_ISLANDS; - } - - /// Extra wait hazard is needed in some cases before - /// s_cbranch_vccnz/s_cbranch_vccz. - bool hasReadVCCZBug() const { - return getGeneration() <= SEA_ISLANDS; - } - - /// Writes to VCC_LO/VCC_HI update the VCCZ flag. - bool partialVCCWritesUpdateVCCZ() const { - return getGeneration() >= GFX10; - } - - /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR - /// was written by a VALU instruction. - bool hasSMRDReadVALUDefHazard() const { - return getGeneration() == SOUTHERN_ISLANDS; - } - - /// A read of an SGPR by a VMEM instruction requires 5 wait states when the - /// SGPR was written by a VALU Instruction. - bool hasVMEMReadSGPRVALUDefHazard() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasRFEHazards() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. - unsigned getSetRegWaitStates() const { - return getGeneration() <= SEA_ISLANDS ? 1 : 2; - } - - bool dumpCode() const { - return DumpCode; - } - - /// Return the amount of LDS that can be used that will not restrict the - /// occupancy lower than WaveCount. - unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, - const Function &) const; - - bool supportsMinMaxDenormModes() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - /// \returns If target supports S_DENORM_MODE. - bool hasDenormModeInst() const { - return getGeneration() >= AMDGPUSubtarget::GFX10; - } - - bool useFlatForGlobal() const { - return FlatForGlobal; - } - - /// \returns If target supports ds_read/write_b128 and user enables generation - /// of ds_read/write_b128. - bool useDS128() const { - return CIInsts && EnableDS128; - } - - /// \return If target supports ds_read/write_b96/128. - bool hasDS96AndDS128() const { - return CIInsts; - } - - /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 - bool haveRoundOpsF64() const { - return CIInsts; - } - - /// \returns If MUBUF instructions always perform range checking, even for - /// buffer resources used for private memory access. - bool privateMemoryResourceIsRangeChecked() const { - return getGeneration() < AMDGPUSubtarget::GFX9; - } - - /// \returns If target requires PRT Struct NULL support (zero result registers - /// for sparse texture support). - bool usePRTStrictNull() const { - return EnablePRTStrictNull; - } - - bool hasAutoWaitcntBeforeBarrier() const { - return AutoWaitcntBeforeBarrier; - } - - bool hasUnalignedBufferAccess() const { - return UnalignedBufferAccess; - } - - bool hasUnalignedBufferAccessEnabled() const { - return UnalignedBufferAccess && UnalignedAccessMode; - } - - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } - - bool hasUnalignedDSAccessEnabled() const { - return UnalignedDSAccess && UnalignedAccessMode; - } - - bool hasUnalignedScratchAccess() const { - return UnalignedScratchAccess; - } - - bool hasUnalignedAccessMode() const { - return UnalignedAccessMode; - } - - bool hasApertureRegs() const { - return HasApertureRegs; - } - - bool isTrapHandlerEnabled() const { - return TrapHandler; - } - - bool isXNACKEnabled() const { - return EnableXNACK; - } - - bool isCuModeEnabled() const { - return EnableCuMode; - } - - bool hasFlatAddressSpace() const { - return FlatAddressSpace; - } - - bool hasFlatScrRegister() const { - return hasFlatAddressSpace(); - } - - bool hasFlatInstOffsets() const { - return FlatInstOffsets; - } - - bool hasFlatGlobalInsts() const { - return FlatGlobalInsts; - } - - bool hasFlatScratchInsts() const { - return FlatScratchInsts; - } - - // Check if target supports ST addressing mode with FLAT scratch instructions. - // The ST addressing mode means no registers are used, either VGPR or SGPR, - // but only immediate offset is swizzled and added to the FLAT scratch base. - bool hasFlatScratchSTMode() const { - return hasFlatScratchInsts() && hasGFX10_3Insts(); - } - - bool hasScalarFlatScratchInsts() const { - return ScalarFlatScratchInsts; - } - - bool hasGlobalAddTidInsts() const { - return GFX10_BEncoding; - } - - bool hasAtomicCSub() const { - return GFX10_BEncoding; - } - - bool hasMultiDwordFlatScratchAddressing() const { - return getGeneration() >= GFX9; - } - - bool hasFlatSegmentOffsetBug() const { - return HasFlatSegmentOffsetBug; - } - - bool hasFlatLgkmVMemCountInOrder() const { - return getGeneration() > GFX9; - } - - bool hasD16LoadStore() const { - return getGeneration() >= GFX9; - } - - bool d16PreservesUnusedBits() const { - return hasD16LoadStore() && !isSRAMECCEnabled(); - } - - bool hasD16Images() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - /// Return if most LDS instructions have an m0 use that require m0 to be - /// iniitalized. - bool ldsRequiresM0Init() const { - return getGeneration() < GFX9; - } - - // True if the hardware rewinds and replays GWS operations if a wave is - // preempted. - // - // If this is false, a GWS operation requires testing if a nack set the - // MEM_VIOL bit, and repeating if so. - bool hasGWSAutoReplay() const { - return getGeneration() >= GFX9; - } - - /// \returns if target has ds_gws_sema_release_all instruction. - bool hasGWSSemaReleaseAll() const { - return CIInsts; - } - - /// \returns true if the target has integer add/sub instructions that do not - /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, - /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier - /// for saturation. - bool hasAddNoCarry() const { - return AddNoCarryInsts; - } - - bool hasUnpackedD16VMem() const { - return HasUnpackedD16VMem; - } - - // Covers VS/PS/CS graphics shaders - bool isMesaGfxShader(const Function &F) const { - return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); - } - - bool hasMad64_32() const { - return getGeneration() >= SEA_ISLANDS; - } - - bool hasSDWAOmod() const { - return HasSDWAOmod; - } - - bool hasSDWAScalar() const { - return HasSDWAScalar; - } - - bool hasSDWASdst() const { - return HasSDWASdst; - } - - bool hasSDWAMac() const { - return HasSDWAMac; - } - - bool hasSDWAOutModsVOPC() const { - return HasSDWAOutModsVOPC; - } - - bool hasDLInsts() const { - return HasDLInsts; - } - - bool hasDot1Insts() const { - return HasDot1Insts; - } - - bool hasDot2Insts() const { - return HasDot2Insts; - } - - bool hasDot3Insts() const { - return HasDot3Insts; - } - - bool hasDot4Insts() const { - return HasDot4Insts; - } - - bool hasDot5Insts() const { - return HasDot5Insts; - } - - bool hasDot6Insts() const { - return HasDot6Insts; - } - - bool hasMAIInsts() const { - return HasMAIInsts; - } - - bool hasPkFmacF16Inst() const { - return HasPkFmacF16Inst; - } - - bool hasAtomicFaddInsts() const { - return HasAtomicFaddInsts; - } - - bool isSRAMECCEnabled() const { - return EnableSRAMECC; - } - - bool hasNoSdstCMPX() const { - return HasNoSdstCMPX; - } - - bool hasVscnt() const { - return HasVscnt; - } - - bool hasGetWaveIdInst() const { - return HasGetWaveIdInst; - } - - bool hasSMemTimeInst() const { - return HasSMemTimeInst; - } - - bool hasRegisterBanking() const { - return HasRegisterBanking; - } - - bool hasVOP3Literal() const { - return HasVOP3Literal; - } - - bool hasNoDataDepHazard() const { - return HasNoDataDepHazard; - } - - bool vmemWriteNeedsExpWaitcnt() const { - return getGeneration() < SEA_ISLANDS; - } - - // Scratch is allocated in 256 dword per wave blocks for the entire - // wavefront. When viewed from the perspecive of an arbitrary workitem, this - // is 4-byte aligned. - // - // Only 4-byte alignment is really needed to access anything. Transformations - // on the pointer value itself may rely on the alignment / known low bits of - // the pointer. Set this to something above the minimum to avoid needing - // dynamic realignment in common cases. - Align getStackAlignment() const { return Align(16); } - - bool enableMachineScheduler() const override { - return true; - } - - bool useAA() const override; - - bool enableSubRegLiveness() const override { - return true; - } - - void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } - bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } - - // static wrappers - static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); - - // XXX - Why is this here if it isn't in the default pass set? - bool enableEarlyIfConversion() const override { - return true; - } - - bool enableFlatScratch() const; - - void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; - - unsigned getMaxNumUserSGPRs() const { - return 16; - } - - bool hasSMemRealTime() const { - return HasSMemRealTime; - } - - bool hasMovrel() const { - return HasMovrel; - } - - bool hasVGPRIndexMode() const { - return HasVGPRIndexMode; - } - - bool useVGPRIndexMode() const; - - bool hasScalarCompareEq64() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasScalarStores() const { - return HasScalarStores; - } - - bool hasScalarAtomics() const { - return HasScalarAtomics; - } - - bool hasLDSFPAtomics() const { - return GFX8Insts; - } - - bool hasDPP() const { - return HasDPP; - } - - bool hasDPPBroadcasts() const { - return HasDPP && getGeneration() < GFX10; - } - - bool hasDPPWavefrontShifts() const { - return HasDPP && getGeneration() < GFX10; - } - - bool hasDPP8() const { - return HasDPP8; - } - - bool hasR128A16() const { - return HasR128A16; - } - - bool hasGFX10A16() const { - return HasGFX10A16; - } - - bool hasA16() const { return hasR128A16() || hasGFX10A16(); } - - bool hasG16() const { return HasG16; } - - bool hasOffset3fBug() const { - return HasOffset3fBug; - } - - bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } - - bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } - - bool hasNSAEncoding() const { return HasNSAEncoding; } - - bool hasGFX10_BEncoding() const { - return GFX10_BEncoding; - } - - bool hasGFX10_3Insts() const { - return GFX10_3Insts; - } - - bool hasMadF16() const; - - bool enableSIScheduler() const { - return EnableSIScheduler; - } - - bool loadStoreOptEnabled() const { - return EnableLoadStoreOpt; - } - - bool hasSGPRInitBug() const { - return SGPRInitBug; - } - - bool hasMFMAInlineLiteralBug() const { - return HasMFMAInlineLiteralBug; - } - - bool has12DWordStoreHazard() const { - return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; - } - - // \returns true if the subtarget supports DWORDX3 load/store instructions. - bool hasDwordx3LoadStores() const { - return CIInsts; - } - - bool hasReadM0MovRelInterpHazard() const { - return getGeneration() == AMDGPUSubtarget::GFX9; - } - - bool hasReadM0SendMsgHazard() const { - return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && - getGeneration() <= AMDGPUSubtarget::GFX9; - } - - bool hasVcmpxPermlaneHazard() const { - return HasVcmpxPermlaneHazard; - } - - bool hasVMEMtoScalarWriteHazard() const { - return HasVMEMtoScalarWriteHazard; - } - - bool hasSMEMtoVectorWriteHazard() const { - return HasSMEMtoVectorWriteHazard; - } - - bool hasLDSMisalignedBug() const { - return LDSMisalignedBug && !EnableCuMode; - } - - bool hasInstFwdPrefetchBug() const { - return HasInstFwdPrefetchBug; - } - - bool hasVcmpxExecWARHazard() const { - return HasVcmpxExecWARHazard; - } - - bool hasLdsBranchVmemWARHazard() const { - return HasLdsBranchVmemWARHazard; - } - - bool hasNSAtoVMEMBug() const { - return HasNSAtoVMEMBug; - } - - bool hasHardClauses() const { return getGeneration() >= GFX10; } - - /// Return the maximum number of waves per SIMD for kernels using \p SGPRs - /// SGPRs - unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; - - /// Return the maximum number of waves per SIMD for kernels using \p VGPRs - /// VGPRs - unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; - - /// Return occupancy for the given function. Used LDS and a number of - /// registers if provided. - /// Note, occupancy can be affected by the scratch allocation as well, but - /// we do not have enough information to compute it. - unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, - unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; - - /// \returns true if the flat_scratch register should be initialized with the - /// pointer to the wave's scratch memory rather than a size and offset. - bool flatScratchIsPointer() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - /// \returns true if the machine has merged shaders in which s0-s7 are - /// reserved by the hardware and user SGPRs start at s8 - bool hasMergedShaders() const { - return getGeneration() >= GFX9; - } - - /// \returns SGPR allocation granularity supported by the subtarget. - unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule(this); - } - - /// \returns SGPR encoding granularity supported by the subtarget. - unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule(this); - } - - /// \returns Total number of SGPRs supported by the subtarget. - unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(this); - } - - /// \returns Addressable number of SGPRs supported by the subtarget. - unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); - } - - /// \returns Minimum number of SGPRs that meets the given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of SGPRs that meets the given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); - } - - /// \returns Reserved number of SGPRs for given function \p MF. - unsigned getReservedNumSGPRs(const MachineFunction &MF) const; - - /// \returns Maximum number of SGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of SGPRs explicitly - /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumSGPRs(const MachineFunction &MF) const; - - /// \returns VGPR allocation granularity supported by the subtarget. - unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(this); - } - - /// \returns VGPR encoding granularity supported by the subtarget. - unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule(this); - } - - /// \returns Total number of VGPRs supported by the subtarget. - unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(this); - } - - /// \returns Addressable number of VGPRs supported by the subtarget. - unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); - } - - /// \returns Minimum number of VGPRs that meets given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of VGPRs that meets given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of VGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of VGPRs explicitly - /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumVGPRs(const MachineFunction &MF) const; - - void getPostRAMutations( - std::vector> &Mutations) - const override; - - bool isWave32() const { - return getWavefrontSize() == 32; - } - - bool isWave64() const { - return getWavefrontSize() == 64; - } - - const TargetRegisterClass *getBoolRC() const { - return getRegisterInfo()->getBoolRC(); - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } - - void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, - SDep &Dep) const override; -}; - -class R600Subtarget final : public R600GenSubtargetInfo, - public AMDGPUSubtarget { -private: - R600InstrInfo InstrInfo; - R600FrameLowering FrameLowering; - bool FMA; - bool CaymanISA; - bool CFALUBug; - bool HasVertexCache; - bool R600ALUInst; - bool FP64; - short TexVTXClauseSize; - Generation Gen; - R600TargetLowering TLInfo; - InstrItineraryData InstrItins; - SelectionDAGTargetInfo TSInfo; - -public: - R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const TargetMachine &TM); - - const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } - - const R600FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const R600TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const R600RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; - } - - // Nothing implemented, just prevent crashes on use. - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - - Generation getGeneration() const { - return Gen; - } - - Align getStackAlignment() const { return Align(4); } - - R600Subtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); - - bool hasBFE() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBFI() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBCNT(unsigned Size) const { - if (Size == 32) - return (getGeneration() >= EVERGREEN); - - return false; - } - - bool hasBORROW() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCARRY() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCaymanISA() const { - return CaymanISA; - } - - bool hasFFBL() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFFBH() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFMA() const { return FMA; } - - bool hasCFAluBug() const { return CFALUBug; } - - bool hasVertexCache() const { return HasVertexCache; } - - short getTexVTXClauseSize() const { return TexVTXClauseSize; } - - bool enableMachineScheduler() const override { - return true; - } - - bool enableSubRegLiveness() const override { - return true; - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index e6fa0e25d21..95aefa23c24 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -14,7 +14,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "R600Subtarget.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 79c8003f0c9..46156dc4bdc 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -239,6 +239,26 @@ void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getPeelingPreferences(L, SE, PP); } +const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = { + // Codegen control options which don't matter. + AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler, + AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, + AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess, + AMDGPU::FeatureUnalignedAccessMode, + + AMDGPU::FeatureAutoWaitcntBeforeBarrier, + + // Property of the kernel/environment which can't actually differ. + AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK, + AMDGPU::FeatureTrapHandler, + + // The default assumption needs to be ecc is enabled, but no directly + // exposed operations depend on it, so it can be safely inlined. + AMDGPU::FeatureSRAMECC, + + // Perf-tuning features + AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops}; + GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), @@ -1113,6 +1133,11 @@ void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getPeelingPreferences(L, SE, PP); } +int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const { + return ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) + : getQuarterRateInstrCost(CostKind); +} + R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index c5c2cd651a3..4fc4cf22952 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -19,14 +19,18 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/BasicTTIImpl.h" namespace llvm { class AMDGPUTargetLowering; +class GCNSubtarget; class InstCombiner; class Loop; +class R600Subtarget; class ScalarEvolution; +class SITargetLowering; class Type; class Value; @@ -38,7 +42,7 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { Triple TargetTriple; - const GCNSubtarget *ST; + const TargetSubtargetInfo *ST; const TargetLoweringBase *TLI; const TargetSubtargetInfo *getST() const { return ST; } @@ -68,34 +72,10 @@ class GCNTTIImpl final : public BasicTTIImplBase { bool HasFP64FP16Denormals; unsigned MaxVGPRs; - const FeatureBitset InlineFeatureIgnoreList = { - // Codegen control options which don't matter. - AMDGPU::FeatureEnableLoadStoreOpt, - AMDGPU::FeatureEnableSIScheduler, - AMDGPU::FeatureEnableUnsafeDSOffsetFolding, - AMDGPU::FeatureFlatForGlobal, - AMDGPU::FeaturePromoteAlloca, - AMDGPU::FeatureUnalignedScratchAccess, - AMDGPU::FeatureUnalignedAccessMode, - - AMDGPU::FeatureAutoWaitcntBeforeBarrier, - - // Property of the kernel/environment which can't actually differ. - AMDGPU::FeatureSGPRInitBug, - AMDGPU::FeatureXNACK, - AMDGPU::FeatureTrapHandler, - - // The default assumption needs to be ecc is enabled, but no directly - // exposed operations depend on it, so it can be safely inlined. - AMDGPU::FeatureSRAMECC, - - // Perf-tuning features - AMDGPU::FeatureFastFMAF32, - AMDGPU::HalfRate64Ops - }; + static const FeatureBitset InlineFeatureIgnoreList; const GCNSubtarget *getST() const { return ST; } - const AMDGPUTargetLowering *getTLI() const { return TLI; } + const SITargetLowering *getTLI() const { return TLI; } static inline int getFullRateInstrCost() { return TargetTransformInfo::TCC_Basic; @@ -117,11 +97,8 @@ class GCNTTIImpl final : public BasicTTIImplBase { // On some parts, normal fp64 operations are half rate, and others // quarter. This also applies to some integer operations. - inline int get64BitInstrCost( - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { - return ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) - : getQuarterRateInstrCost(CostKind); - } + int get64BitInstrCost( + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; public: explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 3530c2b7b71..b9a8c6bd005 100644 --- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -7,8 +7,9 @@ //==-----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600RegisterInfo.h" +#include "R600Subtarget.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp index 6cc1e7daad1..e4eacd101ce 100644 --- a/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -38,7 +38,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 130f275519e..ed1dc77bd54 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,7 +11,8 @@ //===----------------------------------------------------------------------===// #include "GCNHazardRecognizer.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/TargetParser.h" diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index b66d64b55ce..f3f9eb53355 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNIterativeScheduler.h" -#include "AMDGPUSubtarget.h" #include "GCNSchedStrategy.h" #include "SIMachineFunctionInfo.h" diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp index aee26e02cbe..fc7105bc15a 100644 --- a/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp index 9da3bf70367..a12e9ab03e1 100644 --- a/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -31,7 +31,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 7bac44476f0..aeec3e88632 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index 1c3d4b9c17a..ba8c85aa502 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -17,7 +17,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H #define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 045cb56d4b9..6e2550298dc 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNSchedStrategy.h" -#include "AMDGPUSubtarget.h" #include "SIMachineFunctionInfo.h" #define DEBUG_TYPE "machine-scheduler" diff --git a/lib/Target/AMDGPU/GCNSubtarget.h b/lib/Target/AMDGPU/GCNSubtarget.h new file mode 100644 index 00000000000..a59b5cf6217 --- /dev/null +++ b/lib/Target/AMDGPU/GCNSubtarget.h @@ -0,0 +1,1059 @@ +//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// AMD GCN specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H + +#include "AMDGPUCallLowering.h" +#include "AMDGPUSubtarget.h" +#include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class MCInst; +class MCInstrInfo; + +} // namespace llvm + +#define GET_SUBTARGETINFO_HEADER +#include "AMDGPUGenSubtargetInfo.inc" + +namespace llvm { + +class GCNTargetMachine; + +class GCNSubtarget final : public AMDGPUGenSubtargetInfo, + public AMDGPUSubtarget { + + using AMDGPUSubtarget::getMaxWavesPerEU; + +public: + enum TrapHandlerAbi { + TrapHandlerAbiNone = 0, + TrapHandlerAbiHsa = 1 + }; + + enum TrapID { + TrapIDHardwareReserved = 0, + TrapIDHSADebugTrap = 1, + TrapIDLLVMTrap = 2, + TrapIDLLVMDebugTrap = 3, + TrapIDDebugBreakpoint = 7, + TrapIDDebugReserved8 = 8, + TrapIDDebugReservedFE = 0xfe, + TrapIDDebugReservedFF = 0xff + }; + + enum TrapRegValues { + LLVMTrapHandlerRegValue = 1 + }; + +private: + /// GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr InlineAsmLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + +protected: + // Basic subtarget description. + Triple TargetTriple; + unsigned Gen; + InstrItineraryData InstrItins; + int LDSBankCount; + unsigned MaxPrivateElementSize; + + // Possibly statically set by tablegen, but may want to be overridden. + bool FastFMAF32; + bool FastDenormalF32; + bool HalfRate64Ops; + + // Dynamically set bits that enable features. + bool FlatForGlobal; + bool AutoWaitcntBeforeBarrier; + bool UnalignedScratchAccess; + bool UnalignedAccessMode; + bool HasApertureRegs; + bool EnableXNACK; + bool DoesNotSupportXNACK; + bool EnableCuMode; + bool TrapHandler; + + // Used as options. + bool EnableLoadStoreOpt; + bool EnableUnsafeDSOffsetFolding; + bool EnableSIScheduler; + bool EnableDS128; + bool EnablePRTStrictNull; + bool DumpCode; + + // Subtarget statically properties set by tablegen + bool FP64; + bool FMA; + bool MIMG_R128; + bool GCN3Encoding; + bool CIInsts; + bool GFX8Insts; + bool GFX9Insts; + bool GFX10Insts; + bool GFX10_3Insts; + bool GFX7GFX8GFX9Insts; + bool SGPRInitBug; + bool HasSMemRealTime; + bool HasIntClamp; + bool HasFmaMixInsts; + bool HasMovrel; + bool HasVGPRIndexMode; + bool HasScalarStores; + bool HasScalarAtomics; + bool HasSDWAOmod; + bool HasSDWAScalar; + bool HasSDWASdst; + bool HasSDWAMac; + bool HasSDWAOutModsVOPC; + bool HasDPP; + bool HasDPP8; + bool HasR128A16; + bool HasGFX10A16; + bool HasG16; + bool HasNSAEncoding; + bool GFX10_BEncoding; + bool HasDLInsts; + bool HasDot1Insts; + bool HasDot2Insts; + bool HasDot3Insts; + bool HasDot4Insts; + bool HasDot5Insts; + bool HasDot6Insts; + bool HasMAIInsts; + bool HasPkFmacF16Inst; + bool HasAtomicFaddInsts; + bool EnableSRAMECC; + bool DoesNotSupportSRAMECC; + bool HasNoSdstCMPX; + bool HasVscnt; + bool HasGetWaveIdInst; + bool HasSMemTimeInst; + bool HasRegisterBanking; + bool HasVOP3Literal; + bool HasNoDataDepHazard; + bool FlatAddressSpace; + bool FlatInstOffsets; + bool FlatGlobalInsts; + bool FlatScratchInsts; + bool ScalarFlatScratchInsts; + bool AddNoCarryInsts; + bool HasUnpackedD16VMem; + bool LDSMisalignedBug; + bool HasMFMAInlineLiteralBug; + bool UnalignedBufferAccess; + bool UnalignedDSAccess; + bool ScalarizeGlobal; + + bool HasVcmpxPermlaneHazard; + bool HasVMEMtoScalarWriteHazard; + bool HasSMEMtoVectorWriteHazard; + bool HasInstFwdPrefetchBug; + bool HasVcmpxExecWARHazard; + bool HasLdsBranchVmemWARHazard; + bool HasNSAtoVMEMBug; + bool HasOffset3fBug; + bool HasFlatSegmentOffsetBug; + bool HasImageStoreD16Bug; + bool HasImageGather4D16Bug; + + // Dummy feature to use for assembler in tablegen. + bool FeatureDisable; + + SelectionDAGTargetInfo TSInfo; +private: + SIInstrInfo InstrInfo; + SITargetLowering TLInfo; + SIFrameLowering FrameLowering; + +public: + // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. + static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); + + GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM); + ~GCNSubtarget() override; + + GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS); + + const SIInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + + const SIFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + const SITargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const SIRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InlineAsmLowering *getInlineAsmLowering() const override { + return InlineAsmLoweringInfo.get(); + } + + InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } + + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + Generation getGeneration() const { + return (Generation)Gen; + } + + /// Return the number of high bits known to be zero fror a frame index. + unsigned getKnownHighZeroBitsForFrameIndex() const { + return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); + } + + int getLDSBankCount() const { + return LDSBankCount; + } + + unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { + return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; + } + + unsigned getConstantBusLimit(unsigned Opcode) const; + + bool hasIntClamp() const { + return HasIntClamp; + } + + bool hasFP64() const { + return FP64; + } + + bool hasMIMG_R128() const { + return MIMG_R128; + } + + bool hasHWFP64() const { + return FP64; + } + + bool hasFastFMAF32() const { + return FastFMAF32; + } + + bool hasHalfRate64Ops() const { + return HalfRate64Ops; + } + + bool hasAddr64() const { + return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); + } + + bool hasFlat() const { + return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); + } + + // Return true if the target only has the reverse operand versions of VALU + // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). + bool hasOnlyRevVALUShifts() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasFractBug() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + bool hasBFE() const { + return true; + } + + bool hasBFI() const { + return true; + } + + bool hasBFM() const { + return hasBFE(); + } + + bool hasBCNT(unsigned Size) const { + return true; + } + + bool hasFFBL() const { + return true; + } + + bool hasFFBH() const { + return true; + } + + bool hasMed3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + bool hasMin3Max3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + bool hasFmaMixInsts() const { + return HasFmaMixInsts; + } + + bool hasCARRY() const { + return true; + } + + bool hasFMA() const { + return FMA; + } + + bool hasSwap() const { + return GFX9Insts; + } + + bool hasScalarPackInsts() const { + return GFX9Insts; + } + + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + + TrapHandlerAbi getTrapHandlerAbi() const { + return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; + } + + /// True if the offset field of DS instructions works as expected. On SI, the + /// offset uses a 16-bit adder and does not always wrap properly. + bool hasUsableDSOffset() const { + return getGeneration() >= SEA_ISLANDS; + } + + bool unsafeDSOffsetFoldingEnabled() const { + return EnableUnsafeDSOffsetFolding; + } + + /// Condition output from div_scale is usable. + bool hasUsableDivScaleConditionOutput() const { + return getGeneration() != SOUTHERN_ISLANDS; + } + + /// Extra wait hazard is needed in some cases before + /// s_cbranch_vccnz/s_cbranch_vccz. + bool hasReadVCCZBug() const { + return getGeneration() <= SEA_ISLANDS; + } + + /// Writes to VCC_LO/VCC_HI update the VCCZ flag. + bool partialVCCWritesUpdateVCCZ() const { + return getGeneration() >= GFX10; + } + + /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR + /// was written by a VALU instruction. + bool hasSMRDReadVALUDefHazard() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + /// A read of an SGPR by a VMEM instruction requires 5 wait states when the + /// SGPR was written by a VALU Instruction. + bool hasVMEMReadSGPRVALUDefHazard() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasRFEHazards() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. + unsigned getSetRegWaitStates() const { + return getGeneration() <= SEA_ISLANDS ? 1 : 2; + } + + bool dumpCode() const { + return DumpCode; + } + + /// Return the amount of LDS that can be used that will not restrict the + /// occupancy lower than WaveCount. + unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, + const Function &) const; + + bool supportsMinMaxDenormModes() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + /// \returns If target supports S_DENORM_MODE. + bool hasDenormModeInst() const { + return getGeneration() >= AMDGPUSubtarget::GFX10; + } + + bool useFlatForGlobal() const { + return FlatForGlobal; + } + + /// \returns If target supports ds_read/write_b128 and user enables generation + /// of ds_read/write_b128. + bool useDS128() const { + return CIInsts && EnableDS128; + } + + /// \return If target supports ds_read/write_b96/128. + bool hasDS96AndDS128() const { + return CIInsts; + } + + /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 + bool haveRoundOpsF64() const { + return CIInsts; + } + + /// \returns If MUBUF instructions always perform range checking, even for + /// buffer resources used for private memory access. + bool privateMemoryResourceIsRangeChecked() const { + return getGeneration() < AMDGPUSubtarget::GFX9; + } + + /// \returns If target requires PRT Struct NULL support (zero result registers + /// for sparse texture support). + bool usePRTStrictNull() const { + return EnablePRTStrictNull; + } + + bool hasAutoWaitcntBeforeBarrier() const { + return AutoWaitcntBeforeBarrier; + } + + bool hasUnalignedBufferAccess() const { + return UnalignedBufferAccess; + } + + bool hasUnalignedBufferAccessEnabled() const { + return UnalignedBufferAccess && UnalignedAccessMode; + } + + bool hasUnalignedDSAccess() const { + return UnalignedDSAccess; + } + + bool hasUnalignedDSAccessEnabled() const { + return UnalignedDSAccess && UnalignedAccessMode; + } + + bool hasUnalignedScratchAccess() const { + return UnalignedScratchAccess; + } + + bool hasUnalignedAccessMode() const { + return UnalignedAccessMode; + } + + bool hasApertureRegs() const { + return HasApertureRegs; + } + + bool isTrapHandlerEnabled() const { + return TrapHandler; + } + + bool isXNACKEnabled() const { + return EnableXNACK; + } + + bool isCuModeEnabled() const { + return EnableCuMode; + } + + bool hasFlatAddressSpace() const { + return FlatAddressSpace; + } + + bool hasFlatScrRegister() const { + return hasFlatAddressSpace(); + } + + bool hasFlatInstOffsets() const { + return FlatInstOffsets; + } + + bool hasFlatGlobalInsts() const { + return FlatGlobalInsts; + } + + bool hasFlatScratchInsts() const { + return FlatScratchInsts; + } + + // Check if target supports ST addressing mode with FLAT scratch instructions. + // The ST addressing mode means no registers are used, either VGPR or SGPR, + // but only immediate offset is swizzled and added to the FLAT scratch base. + bool hasFlatScratchSTMode() const { + return hasFlatScratchInsts() && hasGFX10_3Insts(); + } + + bool hasScalarFlatScratchInsts() const { + return ScalarFlatScratchInsts; + } + + bool hasGlobalAddTidInsts() const { + return GFX10_BEncoding; + } + + bool hasAtomicCSub() const { + return GFX10_BEncoding; + } + + bool hasMultiDwordFlatScratchAddressing() const { + return getGeneration() >= GFX9; + } + + bool hasFlatSegmentOffsetBug() const { + return HasFlatSegmentOffsetBug; + } + + bool hasFlatLgkmVMemCountInOrder() const { + return getGeneration() > GFX9; + } + + bool hasD16LoadStore() const { + return getGeneration() >= GFX9; + } + + bool d16PreservesUnusedBits() const { + return hasD16LoadStore() && !isSRAMECCEnabled(); + } + + bool hasD16Images() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + /// Return if most LDS instructions have an m0 use that require m0 to be + /// iniitalized. + bool ldsRequiresM0Init() const { + return getGeneration() < GFX9; + } + + // True if the hardware rewinds and replays GWS operations if a wave is + // preempted. + // + // If this is false, a GWS operation requires testing if a nack set the + // MEM_VIOL bit, and repeating if so. + bool hasGWSAutoReplay() const { + return getGeneration() >= GFX9; + } + + /// \returns if target has ds_gws_sema_release_all instruction. + bool hasGWSSemaReleaseAll() const { + return CIInsts; + } + + /// \returns true if the target has integer add/sub instructions that do not + /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, + /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier + /// for saturation. + bool hasAddNoCarry() const { + return AddNoCarryInsts; + } + + bool hasUnpackedD16VMem() const { + return HasUnpackedD16VMem; + } + + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const Function &F) const { + return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); + } + + bool hasMad64_32() const { + return getGeneration() >= SEA_ISLANDS; + } + + bool hasSDWAOmod() const { + return HasSDWAOmod; + } + + bool hasSDWAScalar() const { + return HasSDWAScalar; + } + + bool hasSDWASdst() const { + return HasSDWASdst; + } + + bool hasSDWAMac() const { + return HasSDWAMac; + } + + bool hasSDWAOutModsVOPC() const { + return HasSDWAOutModsVOPC; + } + + bool hasDLInsts() const { + return HasDLInsts; + } + + bool hasDot1Insts() const { + return HasDot1Insts; + } + + bool hasDot2Insts() const { + return HasDot2Insts; + } + + bool hasDot3Insts() const { + return HasDot3Insts; + } + + bool hasDot4Insts() const { + return HasDot4Insts; + } + + bool hasDot5Insts() const { + return HasDot5Insts; + } + + bool hasDot6Insts() const { + return HasDot6Insts; + } + + bool hasMAIInsts() const { + return HasMAIInsts; + } + + bool hasPkFmacF16Inst() const { + return HasPkFmacF16Inst; + } + + bool hasAtomicFaddInsts() const { + return HasAtomicFaddInsts; + } + + bool isSRAMECCEnabled() const { + return EnableSRAMECC; + } + + bool hasNoSdstCMPX() const { + return HasNoSdstCMPX; + } + + bool hasVscnt() const { + return HasVscnt; + } + + bool hasGetWaveIdInst() const { + return HasGetWaveIdInst; + } + + bool hasSMemTimeInst() const { + return HasSMemTimeInst; + } + + bool hasRegisterBanking() const { + return HasRegisterBanking; + } + + bool hasVOP3Literal() const { + return HasVOP3Literal; + } + + bool hasNoDataDepHazard() const { + return HasNoDataDepHazard; + } + + bool vmemWriteNeedsExpWaitcnt() const { + return getGeneration() < SEA_ISLANDS; + } + + // Scratch is allocated in 256 dword per wave blocks for the entire + // wavefront. When viewed from the perspecive of an arbitrary workitem, this + // is 4-byte aligned. + // + // Only 4-byte alignment is really needed to access anything. Transformations + // on the pointer value itself may rely on the alignment / known low bits of + // the pointer. Set this to something above the minimum to avoid needing + // dynamic realignment in common cases. + Align getStackAlignment() const { return Align(16); } + + bool enableMachineScheduler() const override { + return true; + } + + bool useAA() const override; + + bool enableSubRegLiveness() const override { + return true; + } + + void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } + bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } + + // static wrappers + static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); + + // XXX - Why is this here if it isn't in the default pass set? + bool enableEarlyIfConversion() const override { + return true; + } + + bool enableFlatScratch() const; + + void overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const override; + + unsigned getMaxNumUserSGPRs() const { + return 16; + } + + bool hasSMemRealTime() const { + return HasSMemRealTime; + } + + bool hasMovrel() const { + return HasMovrel; + } + + bool hasVGPRIndexMode() const { + return HasVGPRIndexMode; + } + + bool useVGPRIndexMode() const; + + bool hasScalarCompareEq64() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasScalarStores() const { + return HasScalarStores; + } + + bool hasScalarAtomics() const { + return HasScalarAtomics; + } + + bool hasLDSFPAtomics() const { + return GFX8Insts; + } + + bool hasDPP() const { + return HasDPP; + } + + bool hasDPPBroadcasts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPPWavefrontShifts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPP8() const { + return HasDPP8; + } + + bool hasR128A16() const { + return HasR128A16; + } + + bool hasGFX10A16() const { + return HasGFX10A16; + } + + bool hasA16() const { return hasR128A16() || hasGFX10A16(); } + + bool hasG16() const { return HasG16; } + + bool hasOffset3fBug() const { + return HasOffset3fBug; + } + + bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } + + bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } + + bool hasNSAEncoding() const { return HasNSAEncoding; } + + bool hasGFX10_BEncoding() const { + return GFX10_BEncoding; + } + + bool hasGFX10_3Insts() const { + return GFX10_3Insts; + } + + bool hasMadF16() const; + + bool enableSIScheduler() const { + return EnableSIScheduler; + } + + bool loadStoreOptEnabled() const { + return EnableLoadStoreOpt; + } + + bool hasSGPRInitBug() const { + return SGPRInitBug; + } + + bool hasMFMAInlineLiteralBug() const { + return HasMFMAInlineLiteralBug; + } + + bool has12DWordStoreHazard() const { + return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; + } + + // \returns true if the subtarget supports DWORDX3 load/store instructions. + bool hasDwordx3LoadStores() const { + return CIInsts; + } + + bool hasReadM0MovRelInterpHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + + bool hasReadM0SendMsgHazard() const { + return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && + getGeneration() <= AMDGPUSubtarget::GFX9; + } + + bool hasVcmpxPermlaneHazard() const { + return HasVcmpxPermlaneHazard; + } + + bool hasVMEMtoScalarWriteHazard() const { + return HasVMEMtoScalarWriteHazard; + } + + bool hasSMEMtoVectorWriteHazard() const { + return HasSMEMtoVectorWriteHazard; + } + + bool hasLDSMisalignedBug() const { + return LDSMisalignedBug && !EnableCuMode; + } + + bool hasInstFwdPrefetchBug() const { + return HasInstFwdPrefetchBug; + } + + bool hasVcmpxExecWARHazard() const { + return HasVcmpxExecWARHazard; + } + + bool hasLdsBranchVmemWARHazard() const { + return HasLdsBranchVmemWARHazard; + } + + bool hasNSAtoVMEMBug() const { + return HasNSAtoVMEMBug; + } + + bool hasHardClauses() const { return getGeneration() >= GFX10; } + + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs + /// SGPRs + unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; + + /// Return the maximum number of waves per SIMD for kernels using \p VGPRs + /// VGPRs + unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + + /// Return occupancy for the given function. Used LDS and a number of + /// registers if provided. + /// Note, occupancy can be affected by the scratch allocation as well, but + /// we do not have enough information to compute it. + unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, + unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; + + /// \returns true if the flat_scratch register should be initialized with the + /// pointer to the wave's scratch memory rather than a size and offset. + bool flatScratchIsPointer() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + /// \returns true if the machine has merged shaders in which s0-s7 are + /// reserved by the hardware and user SGPRs start at s8 + bool hasMergedShaders() const { + return getGeneration() >= GFX9; + } + + /// \returns SGPR allocation granularity supported by the subtarget. + unsigned getSGPRAllocGranule() const { + return AMDGPU::IsaInfo::getSGPRAllocGranule(this); + } + + /// \returns SGPR encoding granularity supported by the subtarget. + unsigned getSGPREncodingGranule() const { + return AMDGPU::IsaInfo::getSGPREncodingGranule(this); + } + + /// \returns Total number of SGPRs supported by the subtarget. + unsigned getTotalNumSGPRs() const { + return AMDGPU::IsaInfo::getTotalNumSGPRs(this); + } + + /// \returns Addressable number of SGPRs supported by the subtarget. + unsigned getAddressableNumSGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); + } + + /// \returns Minimum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumSGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); + } + + /// \returns Maximum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { + return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); + } + + /// \returns Reserved number of SGPRs for given function \p MF. + unsigned getReservedNumSGPRs(const MachineFunction &MF) const; + + /// \returns Maximum number of SGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of SGPRs explicitly + /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumSGPRs(const MachineFunction &MF) const; + + /// \returns VGPR allocation granularity supported by the subtarget. + unsigned getVGPRAllocGranule() const { + return AMDGPU::IsaInfo::getVGPRAllocGranule(this); + } + + /// \returns VGPR encoding granularity supported by the subtarget. + unsigned getVGPREncodingGranule() const { + return AMDGPU::IsaInfo::getVGPREncodingGranule(this); + } + + /// \returns Total number of VGPRs supported by the subtarget. + unsigned getTotalNumVGPRs() const { + return AMDGPU::IsaInfo::getTotalNumVGPRs(this); + } + + /// \returns Addressable number of VGPRs supported by the subtarget. + unsigned getAddressableNumVGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); + } + + /// \returns Minimum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); + } + + /// \returns Maximum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); + } + + /// \returns Maximum number of VGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of VGPRs explicitly + /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + + void getPostRAMutations( + std::vector> &Mutations) + const override; + + bool isWave32() const { + return getWavefrontSize() == 32; + } + + bool isWave64() const { + return getWavefrontSize() == 64; + } + + const TargetRegisterClass *getBoolRC() const { + return getRegisterInfo()->getBoolRC(); + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } + + void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, + SDep &Dep) const override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 7f93ccc87c1..71b44a50910 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -31,7 +31,7 @@ class Target; class Triple; class raw_pwrite_stream; -enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 }; +enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 }; MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour); diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp index 9ffc9593a25..a96fc7ef234 100644 --- a/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -15,9 +15,10 @@ //===----------------------------------------------------------------------===// #include "R600AsmPrinter.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp index cb39cc73a66..a19d00b6250 100644 --- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -13,7 +13,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 7635cd39cd2..ca1e61393e9 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -13,8 +13,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include using namespace llvm; diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 51f9e5f43b5..664e134889e 100644 --- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -14,8 +14,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 7919e0de476..81dc91ab922 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -14,8 +14,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp index 1ce95a0d416..abd4086db62 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -7,7 +7,7 @@ //==-----------------------------------------------------------------------===// #include "R600FrameLowering.h" -#include "AMDGPUSubtarget.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index eea69e8e50e..c0120903396 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -13,11 +13,11 @@ #include "R600ISelLowering.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 665f88a8a4a..7a623f3e304 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -13,8 +13,9 @@ #include "R600InstrInfo.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" #include "llvm/ADT/SmallSet.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index ffaa320e031..f85a6870628 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 6751362a411..8f19a3e478e 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -27,8 +27,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index 30bc1cf6262..eaac938b098 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index c643a243f8c..e4f7d89bf4c 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -12,8 +12,9 @@ //===----------------------------------------------------------------------===// #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600Subtarget.h b/lib/Target/AMDGPU/R600Subtarget.h new file mode 100644 index 00000000000..07238da18c6 --- /dev/null +++ b/lib/Target/AMDGPU/R600Subtarget.h @@ -0,0 +1,174 @@ +//=====-- R600Subtarget.h - Define Subtarget for AMDGPU R600 ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU R600 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H + +#include "AMDGPUSubtarget.h" +#include "R600FrameLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class MCInst; +class MCInstrInfo; + +} // namespace llvm + +#define GET_SUBTARGETINFO_HEADER +#include "R600GenSubtargetInfo.inc" + +namespace llvm { + +class R600Subtarget final : public R600GenSubtargetInfo, + public AMDGPUSubtarget { +private: + R600InstrInfo InstrInfo; + R600FrameLowering FrameLowering; + bool FMA; + bool CaymanISA; + bool CFALUBug; + bool HasVertexCache; + bool R600ALUInst; + bool FP64; + short TexVTXClauseSize; + Generation Gen; + R600TargetLowering TLInfo; + InstrItineraryData InstrItins; + SelectionDAGTargetInfo TSInfo; + +public: + R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } + + const R600FrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + const R600TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const R600RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + Generation getGeneration() const { + return Gen; + } + + Align getStackAlignment() const { return Align(4); } + + R600Subtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS); + + bool hasBFE() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBCNT(unsigned Size) const { + if (Size == 32) + return (getGeneration() >= EVERGREEN); + + return false; + } + + bool hasBORROW() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCARRY() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCaymanISA() const { + return CaymanISA; + } + + bool hasFFBL() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFFBH() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFMA() const { return FMA; } + + bool hasCFAluBug() const { return CFALUBug; } + + bool hasVertexCache() const { return HasVertexCache; } + + short getTexVTXClauseSize() const { return TexVTXClauseSize; } + + bool enableMachineScheduler() const override { + return true; + } + + bool enableSubRegLiveness() const override { + return true; + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp index 9de39f6a290..3b753cb66ea 100644 --- a/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -16,7 +16,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #define DEBUG_TYPE "si-img-init" diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index d4942cfecc0..625749deb3a 100644 --- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 6beecafee3b..34f59bf34dd 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,7 +65,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp index ccbbb697fec..f7e3ea5fc07 100644 --- a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 316eadc275e..d22bdb79153 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -9,7 +9,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 21025c41c2f..7bf508a72dc 100644 --- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "GCNRegPressure.h" #include "SIMachineFunctionInfo.h" #include "llvm/InitializePasses.h" diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 65f1ac35396..727864c09ee 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,7 +8,8 @@ #include "SIFrameLowering.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 7d62a3bd6d3..bf4ce10cf9f 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -14,7 +14,6 @@ #include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/lib/Target/AMDGPU/SIInsertHardClauses.cpp index 41a8ca5871f..5611c9c5d57 100644 --- a/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -32,7 +32,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index e80325bddc4..9d31cd5cedc 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 29ffe9dc93a..1646557b7e5 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -24,7 +24,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index b0ca9dd3802..dfd0075bf03 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -14,8 +14,9 @@ #include "SIInstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveVariables.h" diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 8ee7c55acb1..b39420f3c7d 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -58,7 +58,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 1fc942aa4c8..9ba5f8f8e85 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,7 +48,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index b8f8c768bde..18ab7d7cd55 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -22,7 +22,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePostDominators.h" diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 80fcb099ffa..30405059530 100644 --- a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -16,7 +16,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/RegisterScavenging.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 896590538ee..8b4e1ba9328 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -8,7 +8,6 @@ #include "SIMachineFunctionInfo.h" #include "AMDGPUTargetMachine.h" -#include "AMDGPUSubtarget.h" #define MAX_LANES 64 diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 6f0b2d7c6b4..3caa75e4d95 100644 --- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -15,7 +15,8 @@ #include "AMDGPU.h" #include "AMDGPUMachineModuleInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp index 539e7713e14..3d659eca47d 100644 --- a/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/lib/Target/AMDGPU/SIModeRegister.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 3fe0008649e..54f20912d0a 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 3d832de9f1c..162e96655df 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -13,7 +13,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index cc33071a925..7d7a753bb33 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,7 +20,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIPostRABundler.cpp b/lib/Target/AMDGPU/SIPostRABundler.cpp index a62c2234873..ab05081e55d 100644 --- a/lib/Target/AMDGPU/SIPostRABundler.cpp +++ b/lib/Target/AMDGPU/SIPostRABundler.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 5dc6eaa1fbe..dc08d9dcb9b 100644 --- a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 7daea086020..9ca43512cd9 100644 --- a/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 5e02748ebb9..60e881d6ef1 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -14,12 +14,14 @@ #include "SIRegisterInfo.h" #include "AMDGPU.h" #include "AMDGPURegisterBankInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterScavenging.h" + using namespace llvm; #define GET_REGINFO_TARGET_DESC diff --git a/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp index 96a38969419..d30ff4a3fd1 100644 --- a/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 8bc17040570..2628070f219 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -9,7 +9,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index b1bbd76dae5..0640e24b37e 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -56,7 +56,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 99b384e9afd..0fd4d7980f8 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -9,8 +9,9 @@ #include "AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUAsmUtils.h" -#include "AMDGPUSubtarget.h" #include "AMDKernelCodeT.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" diff --git a/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp b/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp index d93fdba9450..c772608b49d 100755 --- a/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp +++ b/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h"