1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AMDGPU] Get address space mapping by target triple environment

As we introduced target triple environment amdgiz and amdgizcl, the address
space values are no longer enums. We have to decide the value by target triple.

The basic idea is to use struct AMDGPUAS to represent address space values.
For address space values which are not depend on target triple, use static
const members, so that they don't occupy extra memory space and is equivalent
to a compile time constant.

Since the struct is lightweight and cheap, it can be created on the fly at
the point of usage. Or it can be added as member to a pass and created at
the beginning of the run* function.

Differential Revision: https://reviews.llvm.org/D31284

llvm-svn: 298846
This commit is contained in:
Yaxun Liu 2017-03-27 14:04:01 +00:00
parent a818c43889
commit da52f0e643
39 changed files with 446 additions and 290 deletions

View File

@ -23,6 +23,7 @@ class Pass;
class Target;
class TargetMachine;
class PassRegistry;
class Module;
// R600 Passes
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@ -150,43 +151,53 @@ enum TargetIndex {
/// however on the GPU, each address space points to
/// a separate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
enum AddressSpaces : unsigned {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
FLAT_ADDRESS = 4, ///< Address space for flat memory.
REGION_ADDRESS = 5, ///< Address space for region memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
struct AMDGPUAS {
// The following address space values depend on the triple environment.
unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2)
unsigned FLAT_ADDRESS; ///< Address space for flat memory.
unsigned REGION_ADDRESS; ///< Address space for region memory.
// The maximum value for flat, generic, local, private, constant and region.
const static unsigned MAX_COMMON_ADDRESS = 5;
const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
// order to be able to dynamically index a constant buffer, for example:
//
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
CONSTANT_BUFFER_0 = 8,
CONSTANT_BUFFER_1 = 9,
CONSTANT_BUFFER_2 = 10,
CONSTANT_BUFFER_3 = 11,
CONSTANT_BUFFER_4 = 12,
CONSTANT_BUFFER_5 = 13,
CONSTANT_BUFFER_6 = 14,
CONSTANT_BUFFER_7 = 15,
CONSTANT_BUFFER_8 = 16,
CONSTANT_BUFFER_9 = 17,
CONSTANT_BUFFER_10 = 18,
CONSTANT_BUFFER_11 = 19,
CONSTANT_BUFFER_12 = 20,
CONSTANT_BUFFER_13 = 21,
CONSTANT_BUFFER_14 = 22,
CONSTANT_BUFFER_15 = 23,
const static unsigned CONSTANT_BUFFER_0 = 8;
const static unsigned CONSTANT_BUFFER_1 = 9;
const static unsigned CONSTANT_BUFFER_2 = 10;
const static unsigned CONSTANT_BUFFER_3 = 11;
const static unsigned CONSTANT_BUFFER_4 = 12;
const static unsigned CONSTANT_BUFFER_5 = 13;
const static unsigned CONSTANT_BUFFER_6 = 14;
const static unsigned CONSTANT_BUFFER_7 = 15;
const static unsigned CONSTANT_BUFFER_8 = 16;
const static unsigned CONSTANT_BUFFER_9 = 17;
const static unsigned CONSTANT_BUFFER_10 = 18;
const static unsigned CONSTANT_BUFFER_11 = 19;
const static unsigned CONSTANT_BUFFER_12 = 20;
const static unsigned CONSTANT_BUFFER_13 = 21;
const static unsigned CONSTANT_BUFFER_14 = 22;
const static unsigned CONSTANT_BUFFER_15 = 23;
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u
const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
};
} // namespace AMDGPUAS
namespace llvm {
namespace AMDGPU {
AMDGPUAS getAMDGPUAS(const Module &M);
AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
AMDGPUAS getAMDGPUAS(Triple T);
} // namespace AMDGPU
} // namespace llvm
#endif

View File

@ -37,26 +37,60 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
// Must match the table in getAliasResult.
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) {
// These arrarys are indexed by address space value
// enum elements 0 ... to 5
static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
/* Private Global Constant Group Flat Region*/
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
};
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
/* Flat Global Region Group Constant Private */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
/* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias},
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
/* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
};
assert(AS.MAX_COMMON_ADDRESS <= 5);
if (AS.FLAT_ADDRESS == 0) {
assert(AS.GLOBAL_ADDRESS == 1 &&
AS.REGION_ADDRESS == 2 &&
AS.LOCAL_ADDRESS == 3 &&
AS.CONSTANT_ADDRESS == 4 &&
AS.PRIVATE_ADDRESS == 5);
ASAliasRules = &ASAliasRulesGenIsZero;
} else {
assert(AS.PRIVATE_ADDRESS == 0 &&
AS.GLOBAL_ADDRESS == 1 &&
AS.CONSTANT_ADDRESS == 2 &&
AS.LOCAL_ADDRESS == 3 &&
AS.FLAT_ADDRESS == 4 &&
AS.REGION_ADDRESS == 5);
ASAliasRules = &ASAliasRulesPrivIsZero;
}
}
AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
unsigned AS2) const {
if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS)
report_fatal_error("Pointer address space out of range");
return (*ASAliasRules)[AS1][AS2];
}
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
// This array is indexed by the AMDGPUAS::AddressSpaces
// enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
// see "llvm/Transforms/AMDSPIRUtils.h"
static const AliasResult ASAliasRules[5][5] = {
/* Private Global Constant Group Flat */
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
};
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
report_fatal_error("Pointer address space out of range");
AliasResult Result = ASAliasRules[asA][asB];
AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
if (Result == NoAlias) return Result;
if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) {
@ -75,8 +109,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
if (Base->getType()->getPointerAddressSpace() ==
AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
return true;
}

View File

@ -13,6 +13,7 @@
#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@ -25,11 +26,14 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
friend AAResultBase<AMDGPUAAResult>;
const DataLayout &DL;
AMDGPUAS AS;
public:
explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {}
AMDGPUAAResult(AMDGPUAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL){}
: AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
ASAliasRules(Arg.ASAliasRules){}
/// Handle invalidation events from the new pass manager.
///
@ -42,6 +46,15 @@ public:
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
bool PathAliases(const MDNode *A, const MDNode *B) const;
class ASAliasRulesTy {
public:
ASAliasRulesTy(AMDGPUAS AS_);
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
private:
AMDGPUAS AS;
const AliasResult (*ASAliasRules)[6][6];
} ASAliasRules;
};
/// Analysis pass providing a never-invalidated alias analysis result.
@ -53,7 +66,8 @@ public:
typedef AMDGPUAAResult Result;
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
return AMDGPUAAResult(F.getParent()->getDataLayout());
return AMDGPUAAResult(F.getParent()->getDataLayout(),
Triple(F.getParent()->getTargetTriple()));
}
};
@ -72,7 +86,8 @@ public:
const AMDGPUAAResult &getResult() const { return *Result; }
bool doInitialization(Module &M) override {
Result.reset(new AMDGPUAAResult(M.getDataLayout()));
Result.reset(new AMDGPUAAResult(M.getDataLayout(),
Triple(M.getTargetTriple())));
return false;
}
bool doFinalization(Module &M) override {

View File

@ -28,7 +28,8 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
const TargetMachine *TM;
static bool hasAddrSpaceCast(const Function &F);
AMDGPUAS AS;
static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
void addAttrToCallers(Function *Intrin, StringRef AttrName);
bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
@ -48,10 +49,11 @@ public:
ModulePass::getAnalysisUsage(AU);
}
static bool visitConstantExpr(const ConstantExpr *CE);
static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
AMDGPUAS AS);
};
}
@ -65,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
// The queue ptr is only needed when casting to flat, not from it.
static bool castRequiresQueuePtr(unsigned SrcAS) {
return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
}
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
return castRequiresQueuePtr(ASC->getSrcAddressSpace());
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
const AMDGPUAS &AS) {
return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
}
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
AMDGPUAS AS) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
return castRequiresQueuePtr(SrcAS);
return castRequiresQueuePtr(SrcAS, AS);
}
return false;
@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
AMDGPUAS AS) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@ -97,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
if (visitConstantExpr(CE))
if (visitConstantExpr(CE, AS))
return true;
}
@ -118,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
}
// Return true if an addrspacecast is used that requires the queue ptr.
bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
AMDGPUAS AS) {
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
if (castRequiresQueuePtr(ASC))
if (castRequiresQueuePtr(ASC, AS))
return true;
}
@ -133,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
if (!OpC)
continue;
if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
return true;
}
}
@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
Triple TT(M.getTargetTriple());
AS = AMDGPU::getAMDGPUAS(M);
static const StringRef IntrinsicToAttr[][2] = {
// .x omitted
@ -216,7 +223,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
bool HasApertureRegs =
TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
if (!HasApertureRegs && hasAddrSpaceCast(F))
if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
F.addFnAttr("amdgpu-queue-ptr");
}
}

View File

@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isKernelFunc;
AMDGPUAS AMDGPUASI;
public:
static char ID;
@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
auto isGlobalLoad = [](LoadInst &Load)->bool {
return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
auto isGlobalLoad = [&](LoadInst &Load)->bool {
return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
};
// We're tracking up to the Function boundaries
// We cannot go beyond because of FunctionPass restrictions
@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
AMDGPUASI = AMDGPU::getAMDGPUAS(M);
return false;
}

View File

@ -17,6 +17,7 @@
//
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "Utils/AMDGPUBaseInfo.h"
@ -92,7 +93,9 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
: AsmPrinter(TM, std::move(Streamer)) {
AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
}
StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
@ -174,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Group segment variables aren't emitted in HSA.
if (AMDGPU::isGroupSegment(GV))
if (AMDGPU::isGroupSegment(GV, AMDGPUASI))
return;
AsmPrinter::EmitGlobalVariable(GV);

View File

@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#include "AMDKernelCodeT.h"
#include "AMDGPU.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include <cstddef>
@ -150,6 +151,7 @@ public:
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
AMDGPUAS AMDGPUASI;
};
} // end namespace llvm

View File

@ -31,7 +31,7 @@ using namespace llvm;
#endif
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
: CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@ -49,7 +49,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = *MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
@ -70,7 +70,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = *MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
unsigned Align = DL.getABITypeAlignment(ParamTy);

View File

@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
namespace llvm {
@ -22,6 +23,7 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
AMDGPUAS AMDGPUASI;
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
unsigned Offset) const;

View File

@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
: SelectionDAGISel(TM, OptLevel){
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
}
~AMDGPUDAGToDAGISel() override = default;
bool runOnMachineFunction(MachineFunction &MF) override;
@ -269,7 +272,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@ -586,9 +589,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
if (AS == AMDGPUAS::FLAT_ADDRESS) {
if (AS == AMDGPUASI.FLAT_ADDRESS) {
SelectCode(N);
return;
}

View File

@ -59,6 +59,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
@ -967,19 +968,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
switch (G->getAddressSpace()) {
case AMDGPUAS::LOCAL_ADDRESS: {
if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
// TODO: We could emit code to handle the initialization somewhere.
if (hasDefinedInitializer(GV))
break;
unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
if (!hasDefinedInitializer(GV)) {
unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
}
const Function &Fn = *DAG.getMachineFunction().getFunction();

View File

@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#include "AMDGPU.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@ -34,6 +35,7 @@ private:
protected:
const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
@ -224,6 +226,10 @@ public:
/// type of implicit parameter.
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
AMDGPUAS getAMDGPUAS() const {
return AMDGPUASI;
}
};
namespace AMDGPUISD {

View File

@ -30,7 +30,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
: AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
: AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will

View File

@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#include "AMDGPU.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@ -35,6 +36,8 @@ private:
const AMDGPUSubtarget &ST;
virtual void anchor();
protected:
AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);

View File

@ -33,7 +33,7 @@ using namespace llvm;
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
: InstructionSelector(), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI) {}
TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
MachineOperand
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
@ -291,7 +291,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
if (!I.hasOneMemOperand())
return false;
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
return false;
if (!isInstrUniform(I))

View File

@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@ -35,7 +36,6 @@ public:
const AMDGPURegisterBankInfo &RBI);
bool select(MachineInstr &I) const override;
private:
struct GEPInfo {
const MachineInstr &GEP;
@ -59,6 +59,8 @@ private:
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
protected:
AMDGPUAS AMDGPUASI;
};
} // End llvm namespace.

View File

@ -204,7 +204,7 @@ def COND_NULL : PatLeaf <
//===----------------------------------------------------------------------===//
class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
}]>;
class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
@ -222,7 +222,7 @@ def truncstorei16_private : PrivateStore <truncstorei16>;
def store_private : PrivateStore <store>;
class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
// Global address space loads
@ -242,7 +242,7 @@ def global_store_atomic : GlobalStore<atomic_store>;
class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
// Constant address space loads
@ -253,7 +253,7 @@ class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
def constant_load : ConstantLoad<load>;
class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
// Local address space loads
@ -266,7 +266,7 @@ class LocalStore <SDPatternOperator op> : LocalMemOp <
>;
class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
}]>;
class FlatLoad <SDPatternOperator op> : FlatMemOp <
@ -348,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes <
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
@ -366,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
@ -376,7 +376,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i32 &&
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def _64_local : PatFrag<
@ -384,7 +384,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i64 &&
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
}
@ -394,17 +394,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
def "" : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def _ret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
}
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@ -422,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
def AMDGPUatomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$value),
(AMDGPUatomic_cmp_swap node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def atomic_cmp_swap_global_ret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments

View File

@ -72,6 +72,7 @@ private:
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
MDNode *MaxWorkGroupSizeRange = nullptr;
AMDGPUAS AS;
// FIXME: This should be per-kernel.
uint32_t LocalMemLimit = 0;
@ -154,6 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
AS = AMDGPU::getAMDGPUAS(*F.getParent());
FunctionType *FTy = F.getFunctionType();
@ -162,7 +164,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// we cannot use local memory in the pass.
for (Type *ParamTy : FTy->params()) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
LocalMemLimit = 0;
DEBUG(dbgs() << "Function has local memory argument. Promoting to "
"local memory disabled.\n");
@ -179,7 +181,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@ -317,7 +319,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
@ -413,7 +415,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
}
}
static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@ -468,7 +470,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(0);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@ -480,7 +482,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::Store: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(1);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@ -673,7 +675,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Trying to promote " << I << '\n');
if (tryPromoteAllocaToVector(&I)) {
if (tryPromoteAllocaToVector(&I, AS)) {
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
return;
}
@ -734,7 +736,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
AMDGPUAS::LOCAL_ADDRESS);
AS.LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
@ -767,7 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
Value *Src0 = CI->getOperand(0);
Type *EltTy = Src0->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@ -784,7 +786,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
continue;
Type *EltTy = V->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@ -852,7 +854,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Type *SrcTy = Src->getType()->getPointerElementType();
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
Intrinsic::objectsize,
{ Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
{ Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
);
CallInst *NewCall = Builder.CreateCall(

View File

@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FeatureDisable(false),
InstrItins(getInstrItineraryForCPU(GPU)) {
AS = AMDGPU::getAMDGPUAS(TT);
initializeSubtargetDependencies(TT, GPU, FS);
}

View File

@ -157,6 +157,7 @@ protected:
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
AMDGPUAS AS;
public:
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@ -214,6 +215,10 @@ public:
return MaxPrivateElementSize;
}
AMDGPUAS getAMDGPUAS() const {
return AS;
}
bool has16BitInsts() const {
return Has16BitInsts;
}

View File

@ -240,6 +240,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
TLOF(createTLOF(getTargetTriple())) {
AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@ -809,3 +810,4 @@ void GCNPassConfig::addPreEmitPass() {
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(this, PM);
}

View File

@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
@ -57,17 +58,16 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
AMDGPUAS getAMDGPUAS() const {
return AS;
}
void adjustPassManager(PassManagerBuilder &) override;
/// Get the integer value of a null pointer in the given address space.
uint64_t getNullPointerValue(unsigned AddrSpace) const {
switch(AddrSpace) {
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS:
if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
return -1;
default:
return 0;
}
return 0;
}
};

View File

@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "llvm/MC/MCContext.h"
@ -22,7 +23,8 @@ using namespace llvm;
MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
auto AS = static_cast<const AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) &&
AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
return TextSection;

View File

@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
#include "AMDGPU.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetMachine.h"

View File

@ -48,7 +48,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
const DataLayout &DL = BB->getModule()->getDataLayout();
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS)
continue;
const Value *Ptr = GEP->getPointerOperand();
@ -108,25 +108,24 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
}
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
switch (AddrSpace) {
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
case AMDGPUAS::FLAT_ADDRESS:
AMDGPUAS AS = ST->getAMDGPUAS();
if (AddrSpace == AS.GLOBAL_ADDRESS ||
AddrSpace == AS.CONSTANT_ADDRESS ||
AddrSpace == AS.FLAT_ADDRESS)
return 128;
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS:
if (AddrSpace == AS.LOCAL_ADDRESS ||
AddrSpace == AS.REGION_ADDRESS)
return 64;
case AMDGPUAS::PRIVATE_ADDRESS:
if (AddrSpace == AS.PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
default:
if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
(AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
(AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
return 128;
llvm_unreachable("unhandled address space");
}
if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
(AddrSpace == AS.PARAM_D_ADDRESS ||
AddrSpace == AS.PARAM_I_ADDRESS ||
(AddrSpace >= AS.CONSTANT_BUFFER_0 &&
AddrSpace <= AS.CONSTANT_BUFFER_15)))
return 128;
llvm_unreachable("unhandled address space");
}
bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
@ -135,7 +134,7 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
ChainSizeInBytes <= ST->getMaxPrivateElementSize();
}
@ -362,7 +361,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each

View File

@ -110,7 +110,7 @@ public:
if (IsGraphicsShader)
return -1;
return ST->hasFlatAddressSpace() ?
AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
}
unsigned getVectorSplitCost() { return 0; }

View File

@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset"
class MubufLoad <SDPatternOperator op> : PatFrag <
(ops node:$ptr), (op node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS;
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
def mubuf_load : MubufLoad <load>;

View File

@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo<
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
>;
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
return AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS;
return AS == AMDGPUASI.FLAT_ADDRESS ||
AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
return AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::GLOBAL_ADDRESS;
return AS == AMDGPUASI.FLAT_ADDRESS ||
AS == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
def atomic_flat_load : flat_ld <atomic_load>;

View File

@ -264,20 +264,18 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
unsigned AddressSpace) const {
switch (AddressSpace) {
case AMDGPUAS::PRIVATE_ADDRESS:
if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
return AddressSpaceQualifier::Private;
case AMDGPUAS::GLOBAL_ADDRESS:
if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
return AddressSpaceQualifier::Global;
case AMDGPUAS::CONSTANT_ADDRESS:
if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
return AddressSpaceQualifier::Constant;
case AMDGPUAS::LOCAL_ADDRESS:
if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
return AddressSpaceQualifier::Local;
case AMDGPUAS::FLAT_ADDRESS:
if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
return AddressSpaceQualifier::Generic;
case AMDGPUAS::REGION_ADDRESS:
if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
return AddressSpaceQualifier::Region;
}
llvm_unreachable("Unknown address space qualifier");
}
@ -304,7 +302,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
"image3d_t", ValueKind::Image)
.Default(isa<PointerType>(Ty) ?
(Ty->getPointerAddressSpace() ==
AMDGPUAS::LOCAL_ADDRESS ?
AMDGPUASI.LOCAL_ADDRESS ?
ValueKind::DynamicSharedPointer :
ValueKind::GlobalBuffer) :
ValueKind::ByValue);
@ -460,7 +458,7 @@ void MetadataStreamer::emitKernelArgs(const Function &Func) {
return;
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
AMDGPUAS::GLOBAL_ADDRESS);
AMDGPUASI.GLOBAL_ADDRESS);
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
}
@ -513,7 +511,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
auto ElTy = PtrTy->getElementType();
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized())
if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
}
@ -576,6 +574,7 @@ void MetadataStreamer::emitKernelDebugProps(
}
void MetadataStreamer::begin(const Module &Mod) {
AMDGPUASI = getAMDGPUAS(Mod);
emitVersion();
emitPrintf(Mod);
}

View File

@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
#include "AMDGPU.h"
#include "AMDGPUCodeObjectMetadata.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
@ -36,6 +37,7 @@ namespace CodeObject {
class MetadataStreamer final {
private:
Metadata CodeObjectMetadata;
AMDGPUAS AMDGPUASI;
void dump(StringRef YamlString) const;

View File

@ -557,7 +557,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
case Intrinsic::r600_implicitarg_ptr: {
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
@ -707,12 +707,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@ -869,7 +869,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_BUFFER_0);
AMDGPUASI.CONSTANT_BUFFER_0);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@ -1107,7 +1107,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
@ -1205,9 +1205,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
@ -1230,7 +1231,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
@ -1283,7 +1284,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
if (AS != AMDGPUASI.PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
@ -1302,39 +1303,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// return (512 + (kc_bank << 12)
static int
ConstantAddressBlock(unsigned AddressSpace) {
ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) {
switch (AddressSpace) {
case AMDGPUAS::CONSTANT_BUFFER_0:
case AMDGPUASI.CONSTANT_BUFFER_0:
return 512;
case AMDGPUAS::CONSTANT_BUFFER_1:
case AMDGPUASI.CONSTANT_BUFFER_1:
return 512 + 4096;
case AMDGPUAS::CONSTANT_BUFFER_2:
case AMDGPUASI.CONSTANT_BUFFER_2:
return 512 + 4096 * 2;
case AMDGPUAS::CONSTANT_BUFFER_3:
case AMDGPUASI.CONSTANT_BUFFER_3:
return 512 + 4096 * 3;
case AMDGPUAS::CONSTANT_BUFFER_4:
case AMDGPUASI.CONSTANT_BUFFER_4:
return 512 + 4096 * 4;
case AMDGPUAS::CONSTANT_BUFFER_5:
case AMDGPUASI.CONSTANT_BUFFER_5:
return 512 + 4096 * 5;
case AMDGPUAS::CONSTANT_BUFFER_6:
case AMDGPUASI.CONSTANT_BUFFER_6:
return 512 + 4096 * 6;
case AMDGPUAS::CONSTANT_BUFFER_7:
case AMDGPUASI.CONSTANT_BUFFER_7:
return 512 + 4096 * 7;
case AMDGPUAS::CONSTANT_BUFFER_8:
case AMDGPUASI.CONSTANT_BUFFER_8:
return 512 + 4096 * 8;
case AMDGPUAS::CONSTANT_BUFFER_9:
case AMDGPUASI.CONSTANT_BUFFER_9:
return 512 + 4096 * 9;
case AMDGPUAS::CONSTANT_BUFFER_10:
case AMDGPUASI.CONSTANT_BUFFER_10:
return 512 + 4096 * 10;
case AMDGPUAS::CONSTANT_BUFFER_11:
case AMDGPUASI.CONSTANT_BUFFER_11:
return 512 + 4096 * 11;
case AMDGPUAS::CONSTANT_BUFFER_12:
case AMDGPUASI.CONSTANT_BUFFER_12:
return 512 + 4096 * 12;
case AMDGPUAS::CONSTANT_BUFFER_13:
case AMDGPUASI.CONSTANT_BUFFER_13:
return 512 + 4096 * 13;
case AMDGPUAS::CONSTANT_BUFFER_14:
case AMDGPUASI.CONSTANT_BUFFER_14:
return 512 + 4096 * 14;
case AMDGPUAS::CONSTANT_BUFFER_15:
case AMDGPUASI.CONSTANT_BUFFER_15:
return 512 + 4096 * 15;
default:
return -1;
@ -1402,7 +1403,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = LoadNode->getMemoryVT();
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
return lowerPrivateExtLoad(Op, DAG);
}
@ -1412,13 +1413,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(),
AMDGPUASI);
if (ConstantBlock > -1 &&
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
@ -1450,7 +1452,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(4, DL, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
@ -1486,7 +1488,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
return SDValue();
}
@ -1563,7 +1565,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_BUFFER_0);
AMDGPUASI.CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up

View File

@ -316,7 +316,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
(cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
(cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
>;
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag<load>;
class LoadVtxId1 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
(LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
(LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
!isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
}]>;
@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
}]>;

View File

@ -202,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
auto AMDGPUASI = ST.getAMDGPUAS();
if (ST.debuggerEmitPrologue())
emitDebuggerPrologue(MF, MBB);
@ -340,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PointerType *PtrTy =
PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |

View File

@ -597,8 +597,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
@ -613,8 +612,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
return isLegalMUBUFAddressingMode(AM);
case AMDGPUAS::CONSTANT_ADDRESS:
} else if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@ -652,11 +650,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
case AMDGPUAS::PRIVATE_ADDRESS:
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS:
} else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
AS == AMDGPUASI.REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@ -671,17 +668,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
case AMDGPUAS::FLAT_ADDRESS:
case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
} else if (AS == AMDGPUASI.FLAT_ADDRESS ||
AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
default:
} else {
llvm_unreachable("unhandled address space");
}
}
@ -702,8 +697,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return false;
}
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
AddrSpace == AMDGPUASI.REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
@ -718,8 +713,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (!Subtarget->hasUnalignedScratchAccess() &&
(AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
(AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
return false;
}
@ -727,7 +722,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
*IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
(Align % 4 == 0) : true;
}
@ -767,15 +762,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS;
static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.FLAT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@ -789,7 +785,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
@ -850,7 +846,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
@ -863,7 +859,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const ISD::InputArg *Arg) const {
const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned Align = DL.getABITypeAlignment(Ty);
@ -1073,7 +1069,7 @@ SDValue SITargetLowering::LowerFormalArguments(
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
// real pointers, so we can't guarantee their size.
@ -2206,13 +2202,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@ -2351,7 +2347,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
SelectionDAG &DAG) const {
if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
AMDGPU::SRC_PRIVATE_BASE;
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
}
@ -2367,7 +2363,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
DAG.getConstant(StructOffset, SL, MVT::i64));
@ -2376,7 +2372,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS));
AMDGPUASI.CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
@ -2397,9 +2393,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
@ -2411,9 +2409,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
// local/private -> flat
if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
unsigned SrcAS = ASC->getSrcAddressSpace();
if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
@ -2513,8 +2513,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
@ -2565,8 +2565,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
SDLoc DL(GSD);
@ -2583,7 +2583,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
// FIXME: Use a PseudoSourceValue once those can be assigned an address space.
@ -3229,21 +3229,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS)
if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::CONSTANT_ADDRESS:
if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
if (isMemOpUniform(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
LLVM_FALLTHROUGH;
case AMDGPUAS::GLOBAL_ADDRESS:
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
@ -3251,13 +3250,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// have the same legalization requirements as global and private
// loads.
//
LLVM_FALLTHROUGH;
case AMDGPUAS::FLAT_ADDRESS:
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v4 loads are supported for private and global memory.
return SDValue();
case AMDGPUAS::PRIVATE_ADDRESS:
}
if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
@ -3276,7 +3277,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
case AMDGPUAS::LOCAL_ADDRESS:
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
@ -3285,9 +3286,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_read_b64.
return SplitVectorLoad(Op, DAG);
default:
return SDValue();
}
return SDValue();
}
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@ -3656,18 +3656,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS)
if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::FLAT_ADDRESS:
if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
return SDValue();
case AMDGPUAS::PRIVATE_ADDRESS: {
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
@ -3682,8 +3681,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
}
case AMDGPUAS::LOCAL_ADDRESS: {
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
@ -3692,8 +3690,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_write_b64.
return SplitVectorStore(Op, DAG);
}
default:
} else {
llvm_unreachable("unhandled address space");
}
}
@ -3724,7 +3721,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
if (!isFlatGlobalAddrSpace(AS))
if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
return Op;
// Non-local address space requires custom lowering for atomic compare
@ -3781,26 +3778,26 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
/// the immediate offsets of a memory instruction for the given address space.
static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
const SISubtarget &STI) {
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
auto AMDGPUASI = STI.getAMDGPUAS();
if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// MUBUF instructions a 12-bit offset in bytes.
return isUInt<12>(OffsetSize);
case AMDGPUAS::CONSTANT_ADDRESS:
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// SMRD instructions have an 8-bit offset in dwords on SI and
// a 20-bit offset in bytes on VI.
if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return isUInt<20>(OffsetSize);
else
return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS:
}
if (AS == AMDGPUASI.LOCAL_ADDRESS ||
AS == AMDGPUASI.REGION_ADDRESS) {
// The single offset versions have a 16-bit offset in bytes.
return isUInt<16>(OffsetSize);
case AMDGPUAS::PRIVATE_ADDRESS:
// Indirect register addressing does not use any offsets.
default:
return false;
}
// Indirect register addressing does not use any offsets.
return false;
}
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
@ -3858,7 +3855,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
// TODO: We could also do this for multiplies.
unsigned AS = N->getAddressSpace();
if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
if (NewPtr) {
SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());

View File

@ -3747,7 +3747,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
(*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
(*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@ -3854,7 +3854,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
return true;
}
return false;

View File

@ -107,7 +107,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
>;
def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
@ -144,7 +144,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore,
def si_st_local : PatFrag <
(ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_store_local : PatFrag <

View File

@ -226,9 +226,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
auto Ld = cast<LoadSDNode>(N);
return Ld->getAlignment() >= 4 &&
((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;

View File

@ -19,6 +19,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@ -354,16 +355,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
ELF::SHF_AMDGPU_HSA_AGENT);
}
bool isGroupSegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
bool isGlobalSegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
}
bool isReadOnlySegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@ -736,6 +737,60 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
isUInt<20>(EncodedOffset);
}
} // end namespace AMDGPU
} // end namespace llvm
const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
const unsigned AMDGPUAS::GLOBAL_ADDRESS;
const unsigned AMDGPUAS::LOCAL_ADDRESS;
const unsigned AMDGPUAS::PARAM_D_ADDRESS;
const unsigned AMDGPUAS::PARAM_I_ADDRESS;
const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
namespace llvm {
namespace AMDGPU {
AMDGPUAS getAMDGPUAS(Triple T) {
auto Env = T.getEnvironmentName();
AMDGPUAS AS;
if (Env == "amdgiz" || Env == "amdgizcl") {
AS.FLAT_ADDRESS = 0;
AS.CONSTANT_ADDRESS = 4;
AS.PRIVATE_ADDRESS = 5;
AS.REGION_ADDRESS = 2;
}
else {
AS.FLAT_ADDRESS = 4;
AS.CONSTANT_ADDRESS = 2;
AS.PRIVATE_ADDRESS = 0;
AS.REGION_ADDRESS = 5;
}
return AS;
}
AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
return getAMDGPUAS(M.getTargetTriple());
}
AMDGPUAS getAMDGPUAS(const Module &M) {
return getAMDGPUAS(Triple(M.getTargetTriple()));
}
} // namespace AMDGPU
} // namespace llvm

View File

@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
@ -160,9 +161,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
/// \returns True if constants should be emitted to .text section for given
/// target triple \p TT, false otherwise.