mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
AMDGPU: Remove remnants of old address space mapping
llvm-svn: 341165
This commit is contained in:
parent
301ecc5e4a
commit
14734dd899
@ -221,19 +221,18 @@ enum TargetIndex {
|
||||
/// however on the GPU, each address space points to
|
||||
/// a separate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
struct AMDGPUAS {
|
||||
// The following address space values depend on the triple environment.
|
||||
unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
|
||||
unsigned FLAT_ADDRESS; ///< Address space for flat memory.
|
||||
unsigned REGION_ADDRESS; ///< Address space for region memory.
|
||||
|
||||
namespace AMDGPUAS {
|
||||
enum : unsigned {
|
||||
// The maximum value for flat, generic, local, private, constant and region.
|
||||
MAX_AMDGPU_ADDRESS = 6,
|
||||
|
||||
FLAT_ADDRESS = 0, ///< Address space for flat memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
REGION_ADDRESS = 2, ///< Address space for region memory.
|
||||
|
||||
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
|
||||
|
||||
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
|
||||
|
||||
@ -270,12 +269,4 @@ struct AMDGPUAS {
|
||||
};
|
||||
};
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
AMDGPUAS getAMDGPUAS(const Module &M);
|
||||
AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
|
||||
AMDGPUAS getAMDGPUAS(Triple T);
|
||||
} // namespace AMDGPU
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -47,20 +47,10 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
}
|
||||
|
||||
// Must match the table in getAliasResult.
|
||||
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
|
||||
: Arch(Arch_), AS(AS_) {
|
||||
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(Triple::ArchType Arch_)
|
||||
: Arch(Arch_) {
|
||||
// These arrarys are indexed by address space value
|
||||
// enum elements 0 ... to 6
|
||||
static const AliasResult ASAliasRulesPrivIsZero[7][7] = {
|
||||
/* Private Global Constant Group Flat Region Constant 32-bit */
|
||||
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
|
||||
/* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
|
||||
/* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
|
||||
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias},
|
||||
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
|
||||
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
|
||||
/* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}
|
||||
};
|
||||
static const AliasResult ASAliasRulesGenIsZero[7][7] = {
|
||||
/* Flat Global Region Group Constant Private Constant 32-bit */
|
||||
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
|
||||
@ -71,30 +61,15 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
|
||||
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
|
||||
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
|
||||
};
|
||||
|
||||
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
|
||||
if (AS.FLAT_ADDRESS == 0) {
|
||||
assert(AS.GLOBAL_ADDRESS == 1 &&
|
||||
AS.REGION_ADDRESS == 2 &&
|
||||
AS.LOCAL_ADDRESS == 3 &&
|
||||
AS.CONSTANT_ADDRESS == 4 &&
|
||||
AS.PRIVATE_ADDRESS == 5 &&
|
||||
AS.CONSTANT_ADDRESS_32BIT == 6);
|
||||
ASAliasRules = &ASAliasRulesGenIsZero;
|
||||
} else {
|
||||
assert(AS.PRIVATE_ADDRESS == 0 &&
|
||||
AS.GLOBAL_ADDRESS == 1 &&
|
||||
AS.CONSTANT_ADDRESS == 2 &&
|
||||
AS.LOCAL_ADDRESS == 3 &&
|
||||
AS.FLAT_ADDRESS == 4 &&
|
||||
AS.REGION_ADDRESS == 5 &&
|
||||
AS.CONSTANT_ADDRESS_32BIT == 6);
|
||||
ASAliasRules = &ASAliasRulesPrivIsZero;
|
||||
}
|
||||
|
||||
ASAliasRules = &ASAliasRulesGenIsZero;
|
||||
}
|
||||
|
||||
AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
|
||||
unsigned AS2) const {
|
||||
if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) {
|
||||
if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) {
|
||||
if (Arch == Triple::amdgcn)
|
||||
report_fatal_error("Pointer address space out of range");
|
||||
return AS1 == AS2 ? MayAlias : NoAlias;
|
||||
@ -118,9 +93,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
|
||||
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
|
||||
bool OrLocal) {
|
||||
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
|
||||
|
||||
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS ||
|
||||
Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) {
|
||||
unsigned AS = Base->getType()->getPointerAddressSpace();
|
||||
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -33,13 +33,12 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
|
||||
friend AAResultBase<AMDGPUAAResult>;
|
||||
|
||||
const DataLayout &DL;
|
||||
AMDGPUAS AS;
|
||||
|
||||
public:
|
||||
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
|
||||
DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
|
||||
DL(DL), ASAliasRules(T.getArch()) {}
|
||||
AMDGPUAAResult(AMDGPUAAResult &&Arg)
|
||||
: AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
|
||||
: AAResultBase(std::move(Arg)), DL(Arg.DL),
|
||||
ASAliasRules(Arg.ASAliasRules){}
|
||||
|
||||
/// Handle invalidation events from the new pass manager.
|
||||
@ -56,13 +55,12 @@ private:
|
||||
|
||||
class ASAliasRulesTy {
|
||||
public:
|
||||
ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
|
||||
ASAliasRulesTy(Triple::ArchType Arch_);
|
||||
|
||||
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
|
||||
|
||||
private:
|
||||
Triple::ArchType Arch;
|
||||
AMDGPUAS AS;
|
||||
const AliasResult (*ASAliasRules)[7][7];
|
||||
} ASAliasRules;
|
||||
};
|
||||
|
@ -86,8 +86,6 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
|
||||
}
|
||||
|
||||
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||
AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
|
||||
|
||||
std::vector<GlobalAlias*> AliasesToRemove;
|
||||
|
||||
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
|
||||
@ -122,7 +120,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||
for (GlobalVariable &GV : M.globals()) {
|
||||
// TODO: Region address
|
||||
unsigned AS = GV.getType()->getAddressSpace();
|
||||
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
|
||||
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
|
||||
continue;
|
||||
|
||||
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
|
||||
|
@ -46,7 +46,6 @@ namespace {
|
||||
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
|
||||
private:
|
||||
const TargetMachine *TM = nullptr;
|
||||
AMDGPUAS AS;
|
||||
|
||||
bool addFeatureAttributes(Function &F);
|
||||
|
||||
@ -67,11 +66,10 @@ public:
|
||||
CallGraphSCCPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
|
||||
static bool visitConstantExpr(const ConstantExpr *CE);
|
||||
static bool visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
|
||||
AMDGPUAS AS);
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -85,20 +83,18 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
|
||||
|
||||
|
||||
// The queue ptr is only needed when casting to flat, not from it.
|
||||
static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
|
||||
return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
|
||||
static bool castRequiresQueuePtr(unsigned SrcAS) {
|
||||
return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
|
||||
}
|
||||
|
||||
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
|
||||
const AMDGPUAS &AS) {
|
||||
return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
|
||||
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
|
||||
return castRequiresQueuePtr(ASC->getSrcAddressSpace());
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
|
||||
AMDGPUAS AS) {
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
|
||||
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
|
||||
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
|
||||
return castRequiresQueuePtr(SrcAS, AS);
|
||||
return castRequiresQueuePtr(SrcAS);
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -106,8 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
|
||||
AMDGPUAS AS) {
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
|
||||
|
||||
if (!ConstantExprVisited.insert(EntryC).second)
|
||||
return false;
|
||||
@ -120,7 +115,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
|
||||
|
||||
// Check this constant expression.
|
||||
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
|
||||
if (visitConstantExpr(CE, AS))
|
||||
if (visitConstantExpr(CE))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -262,7 +257,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
continue;
|
||||
|
||||
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
|
||||
if (castRequiresQueuePtr(ASC, AS)) {
|
||||
if (castRequiresQueuePtr(ASC)) {
|
||||
NeedQueuePtr = true;
|
||||
continue;
|
||||
}
|
||||
@ -273,7 +268,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
if (!OpC)
|
||||
continue;
|
||||
|
||||
if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
|
||||
if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
|
||||
NeedQueuePtr = true;
|
||||
break;
|
||||
}
|
||||
@ -318,7 +313,6 @@ bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
|
||||
if (!TPC)
|
||||
report_fatal_error("TargetMachine is required");
|
||||
|
||||
AS = AMDGPU::getAMDGPUAS(CG.getModule());
|
||||
TM = &TPC->getTM<TargetMachine>();
|
||||
return false;
|
||||
}
|
||||
|
@ -37,7 +37,6 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
|
||||
LoopInfo *LI;
|
||||
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
|
||||
bool isKernelFunc;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
@ -133,7 +132,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
|
||||
if (!DA->isUniform(Ptr))
|
||||
return;
|
||||
auto isGlobalLoad = [&](LoadInst &Load)->bool {
|
||||
return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
||||
};
|
||||
// We're tracking up to the Function boundaries
|
||||
// We cannot go beyond because of FunctionPass restrictions
|
||||
@ -168,7 +167,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(M);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -98,8 +98,7 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
|
||||
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
std::unique_ptr<MCStreamer> Streamer)
|
||||
: AsmPrinter(TM, std::move(Streamer)) {
|
||||
AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
|
||||
}
|
||||
}
|
||||
|
||||
StringRef AMDGPUAsmPrinter::getPassName() const {
|
||||
return "AMDGPU Assembly Printer";
|
||||
|
@ -143,7 +143,6 @@ public:
|
||||
protected:
|
||||
mutable std::vector<std::string> DisasmLines, HexLines;
|
||||
mutable size_t DisasmLineMaxLen;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -28,7 +28,7 @@
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
|
||||
: CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
|
||||
: CallLowering(&TLI) {
|
||||
}
|
||||
|
||||
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
|
||||
@ -51,7 +51,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const Function &F = MF.getFunction();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
LLT PtrType = getLLTForType(*PtrTy, DL);
|
||||
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
|
||||
unsigned KernArgSegmentPtr =
|
||||
@ -73,7 +73,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
|
||||
MachineFunction &MF = MIRBuilder.getMF();
|
||||
const Function &F = MF.getFunction();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
|
||||
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
|
||||
|
@ -23,8 +23,6 @@ namespace llvm {
|
||||
class AMDGPUTargetLowering;
|
||||
|
||||
class AMDGPUCallLowering: public CallLowering {
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
|
||||
uint64_t Offset) const;
|
||||
|
||||
|
@ -63,7 +63,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
|
||||
LegacyDivergenceAnalysis *DA = nullptr;
|
||||
Module *Mod = nullptr;
|
||||
bool HasUnsafeFPMath = false;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
|
||||
/// binary operation \p V.
|
||||
@ -799,8 +798,8 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
|
||||
if (!WidenLoads)
|
||||
return false;
|
||||
|
||||
if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
||||
if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
|
||||
canWidenScalarExtLoad(I)) {
|
||||
IRBuilder<> Builder(&I);
|
||||
Builder.SetCurrentDebugLocation(I.getDebugLoc());
|
||||
@ -900,7 +899,6 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
|
||||
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||
DA = &getAnalysis<LegacyDivergenceAnalysis>();
|
||||
HasUnsafeFPMath = hasUnsafeFPMath(F);
|
||||
AMDGPUASI = TM.getAMDGPUAS();
|
||||
|
||||
bool MadeChange = false;
|
||||
|
||||
|
@ -76,17 +76,17 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
|
||||
|
||||
AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
|
||||
unsigned AddressSpace) const {
|
||||
if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return AddressSpaceQualifier::Private;
|
||||
if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::GLOBAL_ADDRESS)
|
||||
return AddressSpaceQualifier::Global;
|
||||
if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::CONSTANT_ADDRESS)
|
||||
return AddressSpaceQualifier::Constant;
|
||||
if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::LOCAL_ADDRESS)
|
||||
return AddressSpaceQualifier::Local;
|
||||
if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::FLAT_ADDRESS)
|
||||
return AddressSpaceQualifier::Generic;
|
||||
if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
|
||||
if (AddressSpace == AMDGPUAS::REGION_ADDRESS)
|
||||
return AddressSpaceQualifier::Region;
|
||||
|
||||
llvm_unreachable("Unknown address space qualifier");
|
||||
@ -114,7 +114,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
.Case("queue_t", ValueKind::Queue)
|
||||
.Default(isa<PointerType>(Ty) ?
|
||||
(Ty->getPointerAddressSpace() ==
|
||||
AMDGPUASI.LOCAL_ADDRESS ?
|
||||
AMDGPUAS::LOCAL_ADDRESS ?
|
||||
ValueKind::DynamicSharedPointer :
|
||||
ValueKind::GlobalBuffer) :
|
||||
ValueKind::ByValue);
|
||||
@ -355,7 +355,7 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
|
||||
|
||||
unsigned PointeeAlign = 0;
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
|
||||
if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
|
||||
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
PointeeAlign = Arg.getParamAlignment();
|
||||
if (PointeeAlign == 0)
|
||||
PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
|
||||
@ -422,7 +422,7 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
|
||||
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
|
||||
|
||||
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
|
||||
AMDGPUASI.GLOBAL_ADDRESS);
|
||||
AMDGPUAS::GLOBAL_ADDRESS);
|
||||
|
||||
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
|
||||
// "none" argument.
|
||||
@ -447,7 +447,6 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
|
||||
}
|
||||
|
||||
void MetadataStreamer::begin(const Module &Mod) {
|
||||
AMDGPUASI = getAMDGPUAS(Mod);
|
||||
emitVersion();
|
||||
emitPrintf(Mod);
|
||||
}
|
||||
|
@ -37,7 +37,6 @@ namespace HSAMD {
|
||||
class MetadataStreamer final {
|
||||
private:
|
||||
Metadata HSAMetadata;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
void dump(StringRef HSAMetadataString) const;
|
||||
|
||||
|
@ -72,14 +72,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
||||
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
||||
// make the right decision when generating code for different targets.
|
||||
const GCNSubtarget *Subtarget;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
bool EnableLateStructurizeCFG;
|
||||
|
||||
public:
|
||||
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
|
||||
: SelectionDAGISel(*TM, OptLevel) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
||||
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
|
||||
}
|
||||
~AMDGPUDAGToDAGISel() override = default;
|
||||
@ -222,7 +220,6 @@ protected:
|
||||
|
||||
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
|
||||
const R600Subtarget *Subtarget;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
bool isConstantLoad(const MemSDNode *N, int cbID) const;
|
||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
||||
@ -230,9 +227,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
|
||||
SDValue& Offset);
|
||||
public:
|
||||
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
|
||||
AMDGPUDAGToDAGISel(TM, OptLevel) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
||||
}
|
||||
AMDGPUDAGToDAGISel(TM, OptLevel) {}
|
||||
|
||||
void Select(SDNode *N) override;
|
||||
|
||||
@ -348,7 +343,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
|
||||
}
|
||||
|
||||
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
|
||||
if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
|
||||
if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
|
||||
!Subtarget->ldsRequiresM0Init())
|
||||
return N;
|
||||
|
||||
@ -1725,7 +1720,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
|
||||
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
|
||||
MemSDNode *Mem = cast<MemSDNode>(N);
|
||||
unsigned AS = Mem->getAddressSpace();
|
||||
if (AS == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (AS == AMDGPUAS::FLAT_ADDRESS) {
|
||||
SelectCode(N);
|
||||
return;
|
||||
}
|
||||
@ -2108,10 +2103,10 @@ bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
if (CbId == -1)
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
|
||||
return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
|
||||
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
|
||||
return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
|
||||
}
|
||||
|
||||
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
||||
|
@ -146,7 +146,6 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
const AMDGPUSubtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
|
||||
// Lower floating point store/load to integer store/load to reduce the number
|
||||
// of patterns in tablegen.
|
||||
setOperationAction(ISD::LOAD, MVT::f32, Promote);
|
||||
@ -725,7 +724,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
|
||||
{
|
||||
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
|
||||
if (L->getMemOperand()->getAddrSpace()
|
||||
== AMDGPUASI.CONSTANT_ADDRESS_32BIT)
|
||||
== AMDGPUAS::CONSTANT_ADDRESS_32BIT)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@ -1193,8 +1192,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
||||
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
|
||||
const GlobalValue *GV = G->getGlobal();
|
||||
|
||||
if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
G->getAddressSpace() == AMDGPUASI.REGION_ADDRESS) {
|
||||
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
|
||||
if (!MFI->isEntryFunction()) {
|
||||
const Function &Fn = DAG.getMachineFunction().getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
|
@ -41,8 +41,6 @@ public:
|
||||
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
|
||||
|
||||
protected:
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
/// Split a vector store into multiple scalar stores.
|
||||
@ -306,10 +304,6 @@ public:
|
||||
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
|
||||
const ImplicitParameter Param) const;
|
||||
|
||||
AMDGPUAS getAMDGPUAS() const {
|
||||
return AMDGPUASI;
|
||||
}
|
||||
|
||||
MVT getFenceOperandTy(const DataLayout &DL) const override {
|
||||
return MVT::i32;
|
||||
}
|
||||
|
@ -118,8 +118,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
|
||||
if (!Callee)
|
||||
return (unsigned)Thres;
|
||||
|
||||
const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*Caller->getParent());
|
||||
|
||||
// If we have a pointer to private array passed into a function
|
||||
// it will not be optimized out, leaving scratch usage.
|
||||
// Increase the inline threshold to allow inliniting in this case.
|
||||
@ -128,7 +126,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
|
||||
for (Value *PtrArg : CS.args()) {
|
||||
Type *Ty = PtrArg->getType();
|
||||
if (!Ty->isPointerTy() ||
|
||||
Ty->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
|
||||
Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
continue;
|
||||
PtrArg = GetUnderlyingObject(PtrArg, DL);
|
||||
if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
|
||||
|
@ -55,7 +55,6 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
|
||||
#define GET_GLOBALISEL_TEMPORARIES_INIT
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
#undef GET_GLOBALISEL_TEMPORARIES_INIT
|
||||
,AMDGPUASI(STI.getAMDGPUAS())
|
||||
{
|
||||
}
|
||||
|
||||
@ -506,8 +505,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
|
||||
if (!I.hasOneMemOperand())
|
||||
return false;
|
||||
|
||||
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
|
||||
(*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
|
||||
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
(*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
|
||||
return false;
|
||||
|
||||
if (!isInstrUniform(I))
|
||||
|
@ -105,9 +105,6 @@ private:
|
||||
#define GET_GLOBALISEL_TEMPORARIES_DECL
|
||||
#include "AMDGPUGenGlobalISel.inc"
|
||||
#undef GET_GLOBALISEL_TEMPORARIES_DECL
|
||||
|
||||
protected:
|
||||
AMDGPUAS AMDGPUASI;
|
||||
};
|
||||
|
||||
} // End llvm namespace.
|
||||
|
@ -331,37 +331,37 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
|
||||
>;
|
||||
|
||||
class PrivateAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class ConstantAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class LocalAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class GlobalAddress : CodePatPred<[{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class GlobalLoadAddress : CodePatPred<[{
|
||||
auto AS = cast<MemSDNode>(N)->getAddressSpace();
|
||||
return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class FlatLoadAddress : CodePatPred<[{
|
||||
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
|
||||
return AS == AMDGPUASI.FLAT_ADDRESS ||
|
||||
AS == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return AS == AMDGPUAS::FLAT_ADDRESS ||
|
||||
AS == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class FlatStoreAddress : CodePatPred<[{
|
||||
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
|
||||
return AS == AMDGPUASI.FLAT_ADDRESS ||
|
||||
AS == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
return AS == AMDGPUAS::FLAT_ADDRESS ||
|
||||
AS == AMDGPUAS::GLOBAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
|
||||
@ -483,7 +483,7 @@ def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
|
||||
class local_binary_atomic_op<SDNode atomic_op> :
|
||||
PatFrag<(ops node:$ptr, node:$value),
|
||||
(atomic_op node:$ptr, node:$value), [{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
|
||||
@ -500,14 +500,14 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
|
||||
|
||||
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
|
||||
(AMDGPUstore_mskor node:$val, node:$ptr), [{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
|
||||
(ops node:$ptr, node:$cmp, node:$swap),
|
||||
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
|
||||
AtomicSDNode *AN = cast<AtomicSDNode>(N);
|
||||
return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
|
||||
return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
|
||||
@ -516,17 +516,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
|
||||
def "" : PatFrag<
|
||||
(ops node:$ptr, node:$value),
|
||||
(atomic_op node:$ptr, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
|
||||
|
||||
def _noret : PatFrag<
|
||||
(ops node:$ptr, node:$value),
|
||||
(atomic_op node:$ptr, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
|
||||
|
||||
def _ret : PatFrag<
|
||||
(ops node:$ptr, node:$value),
|
||||
(atomic_op node:$ptr, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
|
||||
}
|
||||
|
||||
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
|
||||
@ -553,12 +553,12 @@ def atomic_cmp_swap_global : PatFrag<
|
||||
def atomic_cmp_swap_global_noret : PatFrag<
|
||||
(ops node:$ptr, node:$cmp, node:$value),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
|
||||
|
||||
def atomic_cmp_swap_global_ret : PatFrag<
|
||||
(ops node:$ptr, node:$cmp, node:$value),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc Pattern Fragments
|
||||
|
@ -32,8 +32,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
|
||||
};
|
||||
|
||||
auto AMDGPUAS = ST.getAMDGPUAS();
|
||||
|
||||
const LLT S1 = LLT::scalar(1);
|
||||
const LLT V2S16 = LLT::vector(2, 16);
|
||||
|
||||
@ -44,8 +42,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
|
||||
const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
|
||||
const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
|
||||
const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS.FLAT_ADDRESS);
|
||||
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS.PRIVATE_ADDRESS);
|
||||
const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
|
||||
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
const LLT AddrSpaces[] = {
|
||||
GlobalPtr,
|
||||
|
@ -1333,8 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
|
||||
// for OpenCL 2.0 we have only generic implementation of sincos
|
||||
// function.
|
||||
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
|
||||
const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
|
||||
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
|
||||
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
|
||||
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
|
||||
if (!Fsincos) return false;
|
||||
|
||||
@ -1347,7 +1346,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
|
||||
// The allocaInst allocates the memory in private address space. This need
|
||||
// to be bitcasted to point to the address space of cos pointer type.
|
||||
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
|
||||
if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
|
||||
if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
P = B.CreateAddrSpaceCast(Alloc, PTy);
|
||||
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
|
||||
|
||||
|
@ -99,8 +99,6 @@ private:
|
||||
|
||||
const DataLayout *DL;
|
||||
|
||||
AMDGPUAS AS;
|
||||
|
||||
const TargetLowering *TLI;
|
||||
|
||||
void visit(const Function &F);
|
||||
@ -267,7 +265,6 @@ void AMDGPUPerfHint::runOnFunction(Function &F) {
|
||||
|
||||
const Module &M = *F.getParent();
|
||||
DL = &M.getDataLayout();
|
||||
AS = AMDGPU::getAMDGPUAS(M);
|
||||
|
||||
visit(F);
|
||||
auto Loc = FIM.find(&F);
|
||||
@ -306,14 +303,14 @@ bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
|
||||
if (auto PT = dyn_cast<PointerType>(V->getType())) {
|
||||
unsigned As = PT->getAddressSpace();
|
||||
// Flat likely points to global too.
|
||||
return As == AS.GLOBAL_ADDRESS || As == AS.FLAT_ADDRESS;
|
||||
return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
|
||||
if (auto PT = dyn_cast<PointerType>(V->getType()))
|
||||
return PT->getAddressSpace() == AS.LOCAL_ADDRESS;
|
||||
return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -346,7 +343,8 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
|
||||
bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
|
||||
if (auto PT = dyn_cast<PointerType>(V->getType())) {
|
||||
unsigned As = PT->getAddressSpace();
|
||||
return As == AS.CONSTANT_ADDRESS || As == AS.CONSTANT_ADDRESS_32BIT;
|
||||
return As == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -76,7 +76,6 @@ private:
|
||||
const TargetMachine *TM;
|
||||
Module *Mod = nullptr;
|
||||
const DataLayout *DL = nullptr;
|
||||
AMDGPUAS AS;
|
||||
|
||||
// FIXME: This should be per-kernel.
|
||||
uint32_t LocalMemLimit = 0;
|
||||
@ -156,8 +155,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
||||
if (!ST.isPromoteAllocaEnabled())
|
||||
return false;
|
||||
|
||||
AS = AMDGPU::getAMDGPUAS(*F.getParent());
|
||||
|
||||
bool SufficientLDS = hasSufficientLocalMem(F);
|
||||
bool Changed = false;
|
||||
BasicBlock &EntryBB = *F.begin();
|
||||
@ -238,7 +235,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||
|
||||
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
|
||||
Value *CastDispatchPtr = Builder.CreateBitCast(
|
||||
DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
|
||||
DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
|
||||
|
||||
// We could do a single 64-bit load here, but it's likely that the basic
|
||||
// 32-bit and extract sequence is already present, and it is probably easier
|
||||
@ -342,7 +339,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
|
||||
static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
|
||||
if (DisablePromoteAllocaToVector) {
|
||||
LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
|
||||
@ -406,7 +403,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
|
||||
IRBuilder<> Builder(Inst);
|
||||
switch (Inst->getOpcode()) {
|
||||
case Instruction::Load: {
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
|
||||
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
|
||||
|
||||
@ -418,7 +415,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
|
||||
break;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
StoreInst *SI = cast<StoreInst>(Inst);
|
||||
Value *Ptr = SI->getPointerOperand();
|
||||
@ -610,7 +607,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
|
||||
// we cannot use local memory in the pass.
|
||||
for (Type *ParamTy : FTy->params()) {
|
||||
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
|
||||
if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
|
||||
if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
LocalMemLimit = 0;
|
||||
LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
|
||||
"local memory disabled.\n");
|
||||
@ -627,7 +624,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
|
||||
// Check how much local memory is being used by global objects
|
||||
CurrentLocalMemUsage = 0;
|
||||
for (GlobalVariable &GV : Mod->globals()) {
|
||||
if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
|
||||
if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
|
||||
continue;
|
||||
|
||||
for (const User *U : GV.users()) {
|
||||
@ -706,7 +703,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
|
||||
|
||||
if (tryPromoteAllocaToVector(&I, AS))
|
||||
if (tryPromoteAllocaToVector(&I))
|
||||
return true; // Promoted to vector.
|
||||
|
||||
const Function &ContainingFunction = *I.getParent()->getParent();
|
||||
@ -775,7 +772,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
Twine(F->getName()) + Twine('.') + I.getName(),
|
||||
nullptr,
|
||||
GlobalVariable::NotThreadLocal,
|
||||
AS.LOCAL_ADDRESS);
|
||||
AMDGPUAS::LOCAL_ADDRESS);
|
||||
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
GV->setAlignment(I.getAlignment());
|
||||
|
||||
@ -808,7 +805,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
|
||||
Value *Src0 = CI->getOperand(0);
|
||||
Type *EltTy = Src0->getType()->getPointerElementType();
|
||||
PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
|
||||
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
|
||||
|
||||
if (isa<ConstantPointerNull>(CI->getOperand(0)))
|
||||
CI->setOperand(0, ConstantPointerNull::get(NewTy));
|
||||
@ -825,7 +822,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
continue;
|
||||
|
||||
Type *EltTy = V->getType()->getPointerElementType();
|
||||
PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
|
||||
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
|
||||
|
||||
// FIXME: It doesn't really make sense to try to do this for all
|
||||
// instructions.
|
||||
@ -894,7 +891,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||
Type *SrcTy = Src->getType()->getPointerElementType();
|
||||
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
|
||||
Intrinsic::objectsize,
|
||||
{ Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
|
||||
{ Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
|
||||
);
|
||||
|
||||
CallInst *NewCall = Builder.CreateCall(
|
||||
|
@ -213,7 +213,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
|
||||
TLInfo(TM, *this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
|
||||
AS = AMDGPU::getAMDGPUAS(TT);
|
||||
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
|
||||
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
|
||||
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
|
||||
@ -462,8 +461,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
TexVTXClauseSize(0),
|
||||
Gen(R600),
|
||||
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)),
|
||||
AS (AMDGPU::getAMDGPUAS(TT)) { }
|
||||
InstrItins(getInstrItineraryForCPU(GPU)) { }
|
||||
|
||||
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
unsigned NumRegionInstrs) const {
|
||||
|
@ -378,7 +378,6 @@ protected:
|
||||
bool FeatureDisable;
|
||||
|
||||
SelectionDAGTargetInfo TSInfo;
|
||||
AMDGPUAS AS;
|
||||
private:
|
||||
SIInstrInfo InstrInfo;
|
||||
SITargetLowering TLInfo;
|
||||
@ -447,10 +446,6 @@ public:
|
||||
return MaxPrivateElementSize;
|
||||
}
|
||||
|
||||
AMDGPUAS getAMDGPUAS() const {
|
||||
return AS;
|
||||
}
|
||||
|
||||
bool hasIntClamp() const {
|
||||
return HasIntClamp;
|
||||
}
|
||||
@ -975,7 +970,6 @@ private:
|
||||
R600TargetLowering TLInfo;
|
||||
InstrItineraryData InstrItins;
|
||||
SelectionDAGTargetInfo TSInfo;
|
||||
AMDGPUAS AS;
|
||||
|
||||
public:
|
||||
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
@ -1060,8 +1054,6 @@ public:
|
||||
|
||||
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
|
||||
|
||||
AMDGPUAS getAMDGPUAS() const { return AS; }
|
||||
|
||||
bool enableMachineScheduler() const override {
|
||||
return true;
|
||||
}
|
||||
|
@ -308,7 +308,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
|
||||
FS, Options, getEffectiveRelocModel(RM),
|
||||
getEffectiveCodeModel(CM), OptLevel),
|
||||
TLOF(createTLOF(getTargetTriple())) {
|
||||
AS = AMDGPU::getAMDGPUAS(TT);
|
||||
initAsmInfo();
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,6 @@ namespace llvm {
|
||||
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
||||
protected:
|
||||
std::unique_ptr<TargetLoweringObjectFile> TLOF;
|
||||
AMDGPUAS AS;
|
||||
|
||||
StringRef getGPUName(const Function &F) const;
|
||||
StringRef getFeatureString(const Function &F) const;
|
||||
@ -55,16 +54,13 @@ public:
|
||||
TargetLoweringObjectFile *getObjFileLowering() const override {
|
||||
return TLOF.get();
|
||||
}
|
||||
AMDGPUAS getAMDGPUAS() const {
|
||||
return AS;
|
||||
}
|
||||
|
||||
void adjustPassManager(PassManagerBuilder &) override;
|
||||
|
||||
/// Get the integer value of a null pointer in the given address space.
|
||||
uint64_t getNullPointerValue(unsigned AddrSpace) const {
|
||||
if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
|
||||
return -1;
|
||||
return 0;
|
||||
return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -102,7 +102,6 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
unsigned ThresholdPrivate = UnrollThresholdPrivate;
|
||||
unsigned ThresholdLocal = UnrollThresholdLocal;
|
||||
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
|
||||
const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
|
||||
for (const BasicBlock *BB : L->getBlocks()) {
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
unsigned LocalGEPsSeen = 0;
|
||||
@ -140,9 +139,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
|
||||
unsigned AS = GEP->getAddressSpace();
|
||||
unsigned Threshold = 0;
|
||||
if (AS == ASST.PRIVATE_ADDRESS)
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
Threshold = ThresholdPrivate;
|
||||
else if (AS == ASST.LOCAL_ADDRESS)
|
||||
else if (AS == AMDGPUAS::LOCAL_ADDRESS)
|
||||
Threshold = ThresholdLocal;
|
||||
else
|
||||
continue;
|
||||
@ -150,7 +149,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
if (UP.Threshold >= Threshold)
|
||||
continue;
|
||||
|
||||
if (AS == ASST.PRIVATE_ADDRESS) {
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
const Value *Ptr = GEP->getPointerOperand();
|
||||
const AllocaInst *Alloca =
|
||||
dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
|
||||
@ -160,7 +159,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
|
||||
if (AllocaSize > MaxAlloca)
|
||||
continue;
|
||||
} else if (AS == ASST.LOCAL_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
LocalGEPsSeen++;
|
||||
// Inhibit unroll for local memory if we have seen addressing not to
|
||||
// a variable, most likely we will be unable to combine it.
|
||||
@ -253,19 +252,18 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
|
||||
}
|
||||
|
||||
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||
AMDGPUAS AS = ST->getAMDGPUAS();
|
||||
if (AddrSpace == AS.GLOBAL_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
|
||||
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
||||
return 512;
|
||||
}
|
||||
|
||||
if (AddrSpace == AS.FLAT_ADDRESS ||
|
||||
AddrSpace == AS.LOCAL_ADDRESS ||
|
||||
AddrSpace == AS.REGION_ADDRESS)
|
||||
if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS)
|
||||
return 128;
|
||||
|
||||
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return 8 * ST->getMaxPrivateElementSize();
|
||||
|
||||
llvm_unreachable("unhandled address space");
|
||||
@ -277,7 +275,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
||||
// We allow vectorization of flat stores, even though we may need to decompose
|
||||
// them later if they may access private memory. We don't have enough context
|
||||
// here, and legalization can handle it.
|
||||
if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
|
||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
|
||||
ChainSizeInBytes <= ST->getMaxPrivateElementSize();
|
||||
}
|
||||
@ -552,9 +550,8 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||
// All other loads are not divergent, because if threads issue loads with the
|
||||
// same arguments, they will always get the same result.
|
||||
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
|
||||
return Load->getPointerAddressSpace() ==
|
||||
ST->getAMDGPUAS().PRIVATE_ADDRESS ||
|
||||
Load->getPointerAddressSpace() == ST->getAMDGPUAS().FLAT_ADDRESS;
|
||||
return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
|
||||
Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
|
||||
|
||||
// Atomics are divergent because they are executed sequentially: when an
|
||||
// atomic operation refers to the same address in each thread, then each
|
||||
@ -644,20 +641,19 @@ unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||
AMDGPUAS AS = ST->getAMDGPUAS();
|
||||
if (AddrSpace == AS.GLOBAL_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS)
|
||||
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
|
||||
return 128;
|
||||
if (AddrSpace == AS.LOCAL_ADDRESS ||
|
||||
AddrSpace == AS.REGION_ADDRESS)
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS)
|
||||
return 64;
|
||||
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
||||
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return 32;
|
||||
|
||||
if ((AddrSpace == AS.PARAM_D_ADDRESS ||
|
||||
AddrSpace == AS.PARAM_I_ADDRESS ||
|
||||
(AddrSpace >= AS.CONSTANT_BUFFER_0 &&
|
||||
AddrSpace <= AS.CONSTANT_BUFFER_15)))
|
||||
if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
|
||||
(AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
|
||||
AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
|
||||
return 128;
|
||||
llvm_unreachable("unhandled address space");
|
||||
}
|
||||
@ -668,9 +664,7 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
||||
// We allow vectorization of flat stores, even though we may need to decompose
|
||||
// them later if they may access private memory. We don't have enough context
|
||||
// here, and legalization can handle it.
|
||||
if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
|
||||
return false;
|
||||
return true;
|
||||
return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
|
||||
}
|
||||
|
||||
bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
|
||||
|
@ -179,7 +179,7 @@ public:
|
||||
if (IsGraphicsShader)
|
||||
return -1;
|
||||
return ST->hasFlatAddressSpace() ?
|
||||
ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
|
||||
AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
|
||||
}
|
||||
|
||||
unsigned getVectorSplitCost() { return 0; }
|
||||
|
@ -23,8 +23,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset"
|
||||
class MubufLoad <SDPatternOperator op> : PatFrag <
|
||||
(ops node:$ptr), (op node:$ptr), [{
|
||||
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
|
||||
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}]>;
|
||||
|
||||
def mubuf_load : MubufLoad <load>;
|
||||
|
@ -326,7 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo<
|
||||
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
|
||||
(ops node:$ptr, node:$value),
|
||||
(atomic_op node:$ptr, node:$value),
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
|
||||
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
|
||||
>;
|
||||
|
||||
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
|
||||
|
@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
}
|
||||
|
||||
case Intrinsic::r600_implicitarg_ptr: {
|
||||
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
|
||||
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
|
||||
uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
|
||||
return DAG.getConstant(ByteOffset, DL, PtrVT);
|
||||
}
|
||||
@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
||||
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
|
||||
if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
|
||||
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
|
||||
|
||||
const DataLayout &DL = DAG.getDataLayout();
|
||||
const GlobalValue *GV = GSD->getGlobal();
|
||||
MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
|
||||
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
|
||||
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
|
||||
@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
unsigned DwordOffset) const {
|
||||
unsigned ByteOffset = DwordOffset * 4;
|
||||
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUASI.PARAM_I_ADDRESS);
|
||||
AMDGPUAS::PARAM_I_ADDRESS);
|
||||
|
||||
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
|
||||
assert(isInt<16>(ByteOffset));
|
||||
@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
//TODO: Who creates the i8 stores?
|
||||
assert(Store->isTruncatingStore()
|
||||
|| Store->getValue().getValueType() == MVT::i8);
|
||||
assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
|
||||
assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
SDValue Mask;
|
||||
if (Store->getMemoryVT() == MVT::i8) {
|
||||
@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
// Load dword
|
||||
// TODO: can we be smarter about machine pointer info?
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(
|
||||
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
|
||||
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
|
||||
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
|
||||
|
||||
Chain = Dst.getValue(1);
|
||||
@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Neither LOCAL nor PRIVATE can do vectors at the moment
|
||||
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
VT.isVector()) {
|
||||
if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
|
||||
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
StoreNode->isTruncatingStore()) {
|
||||
// Add an extra level of chain to isolate this vector
|
||||
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
|
||||
@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
|
||||
DAG.getConstant(2, DL, PtrVT));
|
||||
|
||||
if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
// It is beneficial to create MSKOR here instead of combiner to avoid
|
||||
// artificial dependencies introduced by RMW
|
||||
if (StoreNode->isTruncatingStore()) {
|
||||
@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
|
||||
if (AS != AMDGPUASI.PRIVATE_ADDRESS)
|
||||
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SDValue();
|
||||
|
||||
if (MemVT.bitsLT(MVT::i32))
|
||||
@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
|
||||
// Load dword
|
||||
// TODO: can we be smarter about machine pointer info?
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(
|
||||
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
|
||||
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
|
||||
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
|
||||
|
||||
// Get offset within the register.
|
||||
@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT MemVT = LoadNode->getMemoryVT();
|
||||
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
|
||||
|
||||
if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
|
||||
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
|
||||
return lowerPrivateExtLoad(Op, DAG);
|
||||
}
|
||||
@ -1451,8 +1451,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Chain = LoadNode->getChain();
|
||||
SDValue Ptr = LoadNode->getBasePtr();
|
||||
|
||||
if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
|
||||
if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
VT.isVector()) {
|
||||
return scalarizeVectorLoad(LoadNode, DAG);
|
||||
}
|
||||
@ -1473,7 +1473,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
|
||||
DAG.getConstant(4, DL, MVT::i32)),
|
||||
DAG.getConstant(LoadNode->getAddressSpace() -
|
||||
AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
|
||||
AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
|
||||
);
|
||||
}
|
||||
|
||||
@ -1509,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getMergeValues(MergedValues, DL);
|
||||
}
|
||||
|
||||
if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -1606,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
}
|
||||
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUASI.PARAM_I_ADDRESS);
|
||||
AMDGPUAS::PARAM_I_ADDRESS);
|
||||
|
||||
// i64 isn't a legal type, so the register type used ends up as i32, which
|
||||
// isn't expected here. It attempts to create this sextload, but it ends up
|
||||
@ -1656,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
|
||||
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||
const SelectionDAG &DAG) const {
|
||||
// Local and Private addresses do not handle vectors. Limit to i32
|
||||
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
|
||||
return (MemVT.getSizeInBits() <= 32);
|
||||
}
|
||||
return true;
|
||||
|
@ -1504,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
|
||||
switch (Kind) {
|
||||
case PseudoSourceValue::Stack:
|
||||
case PseudoSourceValue::FixedStack:
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
return AMDGPUAS::PRIVATE_ADDRESS;
|
||||
case PseudoSourceValue::ConstantPool:
|
||||
case PseudoSourceValue::GOT:
|
||||
case PseudoSourceValue::JumpTable:
|
||||
case PseudoSourceValue::GlobalValueCallEntry:
|
||||
case PseudoSourceValue::ExternalSymbolCallEntry:
|
||||
case PseudoSourceValue::TargetCustom:
|
||||
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
|
||||
return AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}
|
||||
|
||||
llvm_unreachable("Invalid pseudo source kind");
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
}
|
||||
|
@ -299,7 +299,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
|
||||
class LoadParamFrag <PatFrag load_type> : PatFrag <
|
||||
(ops node:$ptr), (load_type node:$ptr),
|
||||
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
|
||||
(cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
|
||||
(cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
|
||||
>;
|
||||
|
||||
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
|
||||
@ -309,8 +309,8 @@ def vtx_id3_load : LoadParamFrag<load>;
|
||||
class LoadVtxId1 <PatFrag load> : PatFrag <
|
||||
(ops node:$ptr), (load node:$ptr), [{
|
||||
const MemSDNode *LD = cast<MemSDNode>(N);
|
||||
return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
(LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
|
||||
return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
(LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
!isa<GlobalValue>(GetUnderlyingObject(
|
||||
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
|
||||
}]>;
|
||||
@ -322,7 +322,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
|
||||
class LoadVtxId2 <PatFrag load> : PatFrag <
|
||||
(ops node:$ptr), (load node:$ptr), [{
|
||||
const MemSDNode *LD = cast<MemSDNode>(N);
|
||||
return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
|
||||
return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
isa<GlobalValue>(GetUnderlyingObject(
|
||||
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
|
||||
}]>;
|
||||
|
@ -950,11 +950,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
if (AM.BaseGV)
|
||||
return false;
|
||||
|
||||
if (AS == AMDGPUASI.GLOBAL_ADDRESS)
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS)
|
||||
return isLegalGlobalAddressingMode(AM);
|
||||
|
||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
||||
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
||||
// If the offset isn't a multiple of 4, it probably isn't going to be
|
||||
// correctly aligned.
|
||||
// FIXME: Can we get the real alignment here?
|
||||
@ -992,10 +992,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
|
||||
return false;
|
||||
|
||||
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
return isLegalMUBUFAddressingMode(AM);
|
||||
} else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
AS == AMDGPUASI.REGION_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AS == AMDGPUAS::REGION_ADDRESS) {
|
||||
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
|
||||
// field.
|
||||
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
|
||||
@ -1010,8 +1010,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
return true;
|
||||
|
||||
return false;
|
||||
} else if (AS == AMDGPUASI.FLAT_ADDRESS ||
|
||||
AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
|
||||
} else if (AS == AMDGPUAS::FLAT_ADDRESS ||
|
||||
AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
|
||||
// For an unknown address space, this usually means that this is for some
|
||||
// reason being used for pure arithmetic, and not based on some addressing
|
||||
// computation. We don't have instructions that compute pointers with any
|
||||
@ -1025,12 +1025,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
|
||||
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||
const SelectionDAG &DAG) const {
|
||||
if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
|
||||
return (MemVT.getSizeInBits() <= 4 * 32);
|
||||
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
|
||||
return (MemVT.getSizeInBits() <= MaxPrivateBits);
|
||||
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
return (MemVT.getSizeInBits() <= 2 * 32);
|
||||
}
|
||||
return true;
|
||||
@ -1052,8 +1052,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUASI.REGION_ADDRESS) {
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
|
||||
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
|
||||
// with adjacent offsets.
|
||||
@ -1068,8 +1068,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
// will access scratch. If we had access to the IR function, then we
|
||||
// could determine if any private memory was used in the function.
|
||||
if (!Subtarget->hasUnalignedScratchAccess() &&
|
||||
(AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
|
||||
AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
|
||||
(AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1077,8 +1077,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
// If we have an uniform constant load, it still requires using a slow
|
||||
// buffer instruction if unaligned.
|
||||
if (IsFast) {
|
||||
*IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
|
||||
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
|
||||
(Align % 4 == 0) : true;
|
||||
}
|
||||
|
||||
@ -1118,17 +1118,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
|
||||
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUASI.FLAT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
|
||||
static bool isFlatGlobalAddrSpace(unsigned AS) {
|
||||
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUAS::FLAT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
|
||||
}
|
||||
|
||||
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
|
||||
unsigned DestAS) const {
|
||||
return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
|
||||
isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
|
||||
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
|
||||
}
|
||||
|
||||
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
|
||||
@ -1142,7 +1141,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
|
||||
unsigned DestAS) const {
|
||||
// Flat -> private/local is a simple truncate.
|
||||
// Flat -> global is no-op
|
||||
if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
|
||||
return true;
|
||||
|
||||
return isNoopAddrSpaceCast(SrcAS, DestAS);
|
||||
@ -1209,7 +1208,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
||||
= Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
|
||||
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
||||
MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
|
||||
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
|
||||
|
||||
@ -1249,7 +1248,7 @@ SDValue SITargetLowering::lowerKernargMemParameter(
|
||||
uint64_t Offset, unsigned Align, bool Signed,
|
||||
const ISD::InputArg *Arg) const {
|
||||
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
|
||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||
|
||||
// Try to avoid using an extload by loading earlier than the argument address,
|
||||
@ -2567,7 +2566,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
/*isVol = */ false, /*AlwaysInline = */ true,
|
||||
/*isTailCall = */ false, DstInfo,
|
||||
MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy(
|
||||
*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))));
|
||||
*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))));
|
||||
|
||||
MemOpChains.push_back(Cpy);
|
||||
} else {
|
||||
@ -3911,15 +3910,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
|
||||
|
||||
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
|
||||
const Triple &TT = getTargetMachine().getTargetTriple();
|
||||
return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
||||
return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
|
||||
AMDGPU::shouldEmitConstantsToTextSection(TT);
|
||||
}
|
||||
|
||||
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
|
||||
return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
||||
return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
|
||||
!shouldEmitFixup(GV) &&
|
||||
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
|
||||
}
|
||||
@ -4107,10 +4106,10 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
SelectionDAG &DAG) const {
|
||||
// FIXME: Use inline constants (src_{shared, private}_base) instead.
|
||||
if (Subtarget->hasApertureRegs()) {
|
||||
unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
|
||||
unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
|
||||
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
|
||||
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
|
||||
unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
|
||||
unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
|
||||
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
|
||||
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
|
||||
unsigned Encoding =
|
||||
@ -4135,7 +4134,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
|
||||
// Offset into amd_queue_t for group_segment_aperture_base_hi /
|
||||
// private_segment_aperture_base_hi.
|
||||
uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
|
||||
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
|
||||
|
||||
SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset);
|
||||
|
||||
@ -4143,7 +4142,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
// TODO: We should use the value from the IR intrinsic call, but it might not
|
||||
// be available and how do we get it?
|
||||
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
|
||||
AMDGPUASI.CONSTANT_ADDRESS));
|
||||
AMDGPUAS::CONSTANT_ADDRESS));
|
||||
|
||||
MachinePointerInfo PtrInfo(V, StructOffset);
|
||||
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
|
||||
@ -4164,11 +4163,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
|
||||
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
|
||||
|
||||
// flat -> local/private
|
||||
if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
|
||||
unsigned DestAS = ASC->getDestAddressSpace();
|
||||
|
||||
if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
unsigned NullVal = TM.getNullPointerValue(DestAS);
|
||||
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
|
||||
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
|
||||
@ -4180,11 +4179,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
|
||||
}
|
||||
|
||||
// local/private -> flat
|
||||
if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
|
||||
unsigned SrcAS = ASC->getSrcAddressSpace();
|
||||
|
||||
if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
|
||||
SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
unsigned NullVal = TM.getNullPointerValue(SrcAS);
|
||||
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
|
||||
|
||||
@ -4381,9 +4380,9 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
|
||||
bool
|
||||
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||
// We can fold offsets for anything that doesn't require a GOT relocation.
|
||||
return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
||||
return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
|
||||
!shouldEmitGOTReloc(GA->getGlobal());
|
||||
}
|
||||
|
||||
@ -4435,9 +4434,9 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
||||
const GlobalValue *GV = GSD->getGlobal();
|
||||
|
||||
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
|
||||
GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
|
||||
GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
|
||||
if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
|
||||
GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
|
||||
// FIXME: It isn't correct to rely on the type of the pointer. This should
|
||||
// be removed when address space 0 is 64-bit.
|
||||
!GV->getType()->getElementType()->isFunctionTy())
|
||||
@ -4456,7 +4455,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
SIInstrInfo::MO_GOTPCREL32);
|
||||
|
||||
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
|
||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
|
||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
const DataLayout &DataLayout = DAG.getDataLayout();
|
||||
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
|
||||
// FIXME: Use a PseudoSourceValue once those can be assigned an address space.
|
||||
@ -4699,8 +4698,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
|
||||
// in 1D, derivatives dx/dh and dx/dv are packed with undef.
|
||||
if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
|
||||
((NumGradients / 2) % 2 == 1 &&
|
||||
(i == DimIdx + (NumGradients / 2) - 1 ||
|
||||
((NumGradients / 2) % 2 == 1 &&
|
||||
(i == DimIdx + (NumGradients / 2) - 1 ||
|
||||
i == DimIdx + NumGradients - 1))) {
|
||||
AddrHi = DAG.getUNDEF(MVT::f16);
|
||||
} else {
|
||||
@ -6077,8 +6076,8 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
|
||||
|
||||
// FIXME: Constant loads should all be marked invariant.
|
||||
unsigned AS = Ld->getAddressSpace();
|
||||
if (AS != AMDGPUASI.CONSTANT_ADDRESS &&
|
||||
AS != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
|
||||
if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
|
||||
(AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant()))
|
||||
return SDValue();
|
||||
|
||||
@ -6189,14 +6188,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
// If there is a possibilty that flat instruction access scratch memory
|
||||
// then we need to use the same legalization rules we use for private.
|
||||
if (AS == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (AS == AMDGPUAS::FLAT_ADDRESS)
|
||||
AS = MFI->hasFlatScratchInit() ?
|
||||
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
|
||||
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
|
||||
|
||||
unsigned NumElements = MemVT.getVectorNumElements();
|
||||
|
||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
||||
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
||||
if (!Op->isDivergent() && Alignment >= 4)
|
||||
return SDValue();
|
||||
// Non-uniform loads will be selected to MUBUF instructions, so they
|
||||
@ -6205,9 +6204,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
//
|
||||
}
|
||||
|
||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
||||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
||||
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
|
||||
AS == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
|
||||
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
|
||||
Alignment >= 4)
|
||||
@ -6217,16 +6216,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
// loads.
|
||||
//
|
||||
}
|
||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
||||
AS == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
|
||||
AS == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUAS::FLAT_ADDRESS) {
|
||||
if (NumElements > 4)
|
||||
return SplitVectorLoad(Op, DAG);
|
||||
// v4 loads are supported for private and global memory.
|
||||
return SDValue();
|
||||
}
|
||||
if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
// Depending on the setting of the private_element_size field in the
|
||||
// resource descriptor, we can only make private accesses up to a certain
|
||||
// size.
|
||||
@ -6245,7 +6244,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
default:
|
||||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// Use ds_read_b128 if possible.
|
||||
if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
|
||||
MemVT.getStoreSize() == 16)
|
||||
@ -6622,17 +6621,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
// If there is a possibilty that flat instruction access scratch memory
|
||||
// then we need to use the same legalization rules we use for private.
|
||||
if (AS == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (AS == AMDGPUAS::FLAT_ADDRESS)
|
||||
AS = MFI->hasFlatScratchInit() ?
|
||||
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
|
||||
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
|
||||
|
||||
unsigned NumElements = VT.getVectorNumElements();
|
||||
if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUASI.FLAT_ADDRESS) {
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
AS == AMDGPUAS::FLAT_ADDRESS) {
|
||||
if (NumElements > 4)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
return SDValue();
|
||||
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
switch (Subtarget->getMaxPrivateElementSize()) {
|
||||
case 4:
|
||||
return scalarizeVectorStore(Store, DAG);
|
||||
@ -6647,7 +6646,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
default:
|
||||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// Use ds_write_b128 if possible.
|
||||
if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
|
||||
VT.getStoreSize() == 16)
|
||||
@ -6687,7 +6686,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
|
||||
unsigned AS = AtomicNode->getAddressSpace();
|
||||
|
||||
// No custom lowering required for local address space
|
||||
if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
|
||||
if (!isFlatGlobalAddrSpace(AS))
|
||||
return Op;
|
||||
|
||||
// Non-local address space requires custom lowering for atomic compare
|
||||
@ -9205,8 +9204,8 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
|
||||
break;
|
||||
case ISD::LOAD: {
|
||||
const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
|
||||
if (L->getMemOperand()->getAddrSpace() ==
|
||||
Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
|
||||
// FIXME: Also needs to handle flat.
|
||||
if (L->getMemOperand()->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return true;
|
||||
} break;
|
||||
case ISD::CALLSEQ_END:
|
||||
|
@ -370,7 +370,6 @@ private:
|
||||
const MachineRegisterInfo *MRI = nullptr;
|
||||
const MachineLoopInfo *MLI = nullptr;
|
||||
AMDGPU::IsaInfo::IsaVersion IV;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
DenseSet<MachineBasicBlock *> BlockVisitedSet;
|
||||
DenseSet<MachineInstr *> TrackedWaitcntSet;
|
||||
@ -1051,7 +1050,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||
// instruction.
|
||||
for (const MachineMemOperand *Memop : MI.memoperands()) {
|
||||
unsigned AS = Memop->getAddrSpace();
|
||||
if (AS != AMDGPUASI.LOCAL_ADDRESS)
|
||||
if (AS != AMDGPUAS::LOCAL_ADDRESS)
|
||||
continue;
|
||||
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
|
||||
// VM_CNT is only relevant to vgpr or LDS.
|
||||
@ -1086,7 +1085,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||
// FIXME: Should not be relying on memoperands.
|
||||
for (const MachineMemOperand *Memop : MI.memoperands()) {
|
||||
unsigned AS = Memop->getAddrSpace();
|
||||
if (AS != AMDGPUASI.LOCAL_ADDRESS)
|
||||
if (AS != AMDGPUAS::LOCAL_ADDRESS)
|
||||
continue;
|
||||
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
|
||||
EmitWaitcnt |= ScoreBrackets->updateByWait(
|
||||
@ -1305,7 +1304,7 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
|
||||
|
||||
for (const MachineMemOperand *Memop : MI.memoperands()) {
|
||||
unsigned AS = Memop->getAddrSpace();
|
||||
if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1844,7 +1843,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
||||
MLI = &getAnalysis<MachineLoopInfo>();
|
||||
IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
AMDGPUASI = ST->getAMDGPUAS();
|
||||
|
||||
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
|
||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||
|
@ -1937,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
|
||||
switch(Kind) {
|
||||
case PseudoSourceValue::Stack:
|
||||
case PseudoSourceValue::FixedStack:
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
return AMDGPUAS::PRIVATE_ADDRESS;
|
||||
case PseudoSourceValue::ConstantPool:
|
||||
case PseudoSourceValue::GOT:
|
||||
case PseudoSourceValue::JumpTable:
|
||||
case PseudoSourceValue::GlobalValueCallEntry:
|
||||
case PseudoSourceValue::ExternalSymbolCallEntry:
|
||||
case PseudoSourceValue::TargetCustom:
|
||||
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
|
||||
return AMDGPUAS::CONSTANT_ADDRESS;
|
||||
}
|
||||
return ST.getAMDGPUAS().FLAT_ADDRESS;
|
||||
return AMDGPUAS::FLAT_ADDRESS;
|
||||
}
|
||||
|
||||
static void removeModOperands(MachineInstr &MI) {
|
||||
@ -4605,7 +4605,7 @@ void SIInstrInfo::splitScalarBuffer(SetVectorType &Worklist,
|
||||
unsigned Count = 0;
|
||||
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
||||
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
|
||||
|
||||
|
||||
switch(Opcode) {
|
||||
default:
|
||||
return;
|
||||
@ -4619,7 +4619,7 @@ void SIInstrInfo::splitScalarBuffer(SetVectorType &Worklist,
|
||||
|
||||
// FIXME: Should also attempt to build VAddr and Offset like the non-split
|
||||
// case (see call site for this function)
|
||||
|
||||
|
||||
// Create a vector of result registers
|
||||
SmallVector<unsigned, 8> ResultRegs;
|
||||
for (unsigned i = 0; i < Count ; ++i) {
|
||||
@ -4913,7 +4913,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
|
||||
return AMDGPU::NoRegister;
|
||||
|
||||
assert(!MI.memoperands_empty() &&
|
||||
(*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
|
||||
(*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
FrameIndex = Addr->getIndex();
|
||||
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
|
||||
@ -5030,7 +5030,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
|
||||
return true;
|
||||
|
||||
for (const MachineMemOperand *MMO : MI.memoperands()) {
|
||||
if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
|
||||
if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -202,8 +202,6 @@ public:
|
||||
|
||||
class SIMemOpAccess final {
|
||||
private:
|
||||
|
||||
AMDGPUAS SIAddrSpaceInfo;
|
||||
AMDGPUMachineModuleInfo *MMI = nullptr;
|
||||
|
||||
/// Reports unsupported message \p Msg for \p MI to LLVM context.
|
||||
@ -453,22 +451,21 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
||||
}
|
||||
|
||||
SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
|
||||
if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
|
||||
if (AS == AMDGPUAS::FLAT_ADDRESS)
|
||||
return SIAtomicAddrSpace::FLAT;
|
||||
if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS)
|
||||
return SIAtomicAddrSpace::GLOBAL;
|
||||
if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS)
|
||||
return SIAtomicAddrSpace::LDS;
|
||||
if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SIAtomicAddrSpace::SCRATCH;
|
||||
if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
|
||||
if (AS == AMDGPUAS::REGION_ADDRESS)
|
||||
return SIAtomicAddrSpace::GDS;
|
||||
|
||||
return SIAtomicAddrSpace::OTHER;
|
||||
}
|
||||
|
||||
SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
|
||||
SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
|
||||
MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
|
||||
}
|
||||
|
||||
|
@ -382,8 +382,8 @@ defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
|
||||
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
||||
auto Ld = cast<LoadSDNode>(N);
|
||||
return Ld->getAlignment() >= 4 &&
|
||||
((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
|
||||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
|
||||
((((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
|
||||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
|
||||
!Ld->isVolatile() && !N->isDivergent() &&
|
||||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
|
||||
}]>;
|
||||
|
@ -978,29 +978,6 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace AMDGPU
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
|
||||
AMDGPUAS getAMDGPUAS(Triple T) {
|
||||
AMDGPUAS AS;
|
||||
AS.FLAT_ADDRESS = 0;
|
||||
AS.PRIVATE_ADDRESS = 5;
|
||||
AS.REGION_ADDRESS = 2;
|
||||
return AS;
|
||||
}
|
||||
|
||||
AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
|
||||
return getAMDGPUAS(M.getTargetTriple());
|
||||
}
|
||||
|
||||
AMDGPUAS getAMDGPUAS(const Module &M) {
|
||||
return getAMDGPUAS(Triple(M.getTargetTriple()));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct SourceOfDivergence {
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llvm-dis | FileCheck %s
|
||||
; RUN: llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa-amdgiz
|
||||
; RUN: llc -mtriple amdgcn-amd-amdhsa < %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa
|
||||
; RUN: opt -data-layout=A5 -S < %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | opt -S
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare void @external_void_func_void() #0
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
|
||||
|
||||
;
|
||||
; kernel void combine_vloads(global char8 addrspace(5)* src, global char8 addrspace(5)* result) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
%struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] }
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
%struct.ShapeData = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32, i64, <4 x float>, i32, i8, i8, i16, i32, i32 }
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR
|
||||
target datalayout = "A5"
|
||||
|
||||
; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
|
||||
target datalayout = "A5"
|
||||
|
||||
; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s 2>&1 | FileCheck %s
|
||||
target datalayout = "A5"
|
||||
|
||||
; CHECK: in function test_dynamic_stackalloc{{.*}}: unsupported dynamic alloca
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s
|
||||
; Just check the target feature and data layout is accepted without error.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
target triple = "amdgcn-amd-amdhsa-amdgiz"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s
|
||||
; Just check the target feature and data layout is accepted without error.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
target triple = "amdgcn-amd-amdhsa-amdgizcl"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s
|
||||
; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s
|
||||
target datalayout = "A5"
|
||||
; REQUIRES: asserts
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_v2f16:
|
||||
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2:
|
||||
; GCN: buffer_load_dwordx4
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_v2i16:
|
||||
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; How the replacement of i64 stores with v2i32 stores resulted in
|
||||
; breaking other users of the bitcast if they already existed
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=kaveri < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
|
||||
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn---amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
|
||||
;
|
||||
; The original OpenCL kernel:
|
||||
; kernel void f(global int *a, int i, int j) {
|
||||
@ -6,7 +6,7 @@
|
||||
; x[i] = 7;
|
||||
; a[0] = x[j];
|
||||
; }
|
||||
; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o -
|
||||
; clang -cc1 -triple amdgcn--cl -emit-llvm -o -
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}i1_func_void:
|
||||
; GCN: buffer_load_ubyte v0, off
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
target datalayout = "A5"
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: not llvm-as -data-layout=A5 < %s 2>&1 | FileCheck -check-prefixes=COMMON,AS %s
|
||||
; RUN: not llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa-amdgiz 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
; RUN: not llc -mtriple amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
; RUN: not opt -data-layout=A5 -S < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
; RUN: llvm-as < %s | not opt -data-layout=A5 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_i1:
|
||||
; GCN: buffer_load_ubyte
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_i1:
|
||||
; GCN: buffer_load_ubyte
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}local_load_i1:
|
||||
; SICIVI: s_mov_b32 m0
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; Testing for ds_read/write_b128
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Effectively, check that the compile finishes; in the case
|
||||
; of an infinite loop, llc toggles between merging 2 ST4s
|
||||
|
@ -1,5 +1,5 @@
|
||||
;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s
|
||||
;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s
|
||||
;RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s
|
||||
;RUN: llc < %s -march=r600 -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s
|
||||
|
||||
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
target datalayout = "A5"
|
||||
|
||||
declare {}* @llvm.invariant.start.p5i8(i64, i8 addrspace(5)* nocapture) #0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=r600---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: NoAlias: i8 addrspace(5)* %p, i8 addrspace(7)* %p1
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
%struct.widget.0 = type { float, i32, i32 }
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
|
||||
|
||||
; This used to fail due to a v_add_i32 instruction with an illegal immediate
|
||||
; operand that was created during Local Stack Slot Allocation. Test case derived
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: i16 promotion pass ruins the scalar cases when legal.
|
||||
; FIXME: r600 fails verifier
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
# Check that assert is not triggered
|
||||
# GCN-LABEL: name: si-lower-control-flow{{$}}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s
|
||||
target datalayout = "A5"
|
||||
|
||||
; FIXME: Why is this commuted only sometimes?
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
|
||||
|
||||
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
|
||||
; GCN-POSTLINK: tail call fast float @_Z3sinf(
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}store_i1:
|
||||
; EG: MEM_RAT MSKOR
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}store_local_i1:
|
||||
; SICIVI: s_mov_b32 m0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mtriple=r600-- -mcpu=redwood | FileCheck %s
|
||||
|
||||
; This tests for a bug in the SelectionDAG where custom lowered truncated
|
||||
; vector stores at the end of a basic block were not being added to the
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
|
||||
target datalayout = "A5"
|
||||
|
||||
; Should not crash when the processor is not recognized and the
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -tailcallopt < %s 2>&1 | FileCheck -check-prefix=GCN %s
|
||||
; RUN: not llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -tailcallopt < %s 2>&1 | FileCheck -check-prefix=R600 %s
|
||||
; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -tailcallopt < %s 2>&1 | FileCheck -check-prefix=GCN %s
|
||||
; RUN: not llc -march=r600 -mtriple=r600-- -mcpu=cypress -tailcallopt < %s 2>&1 | FileCheck -check-prefix=R600 %s
|
||||
|
||||
declare i32 @external_function(i32) nounwind
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
|
||||
target datalayout = "A5"
|
||||
|
||||
; OPT-LABEL: @vector_read(
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
|
||||
|
||||
; This ends up using all 256 registers and requires register
|
||||
; scavenging which will fail to find an unsued register.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
|
||||
; This ends up using all 255 registers and requires register
|
||||
; scavenging which will fail to find an unsued register.
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %s > %t.ll
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -filetype=obj -O0 -o %t.o %t.ll
|
||||
; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -disassemble -line-numbers %t.o | FileCheck --check-prefix=LINE %t.ll
|
||||
; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -disassemble -source %t.o | FileCheck --check-prefix=SOURCE %t.ll
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -O0 -o %t.o %t.ll
|
||||
; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa -mcpu=gfx802 -disassemble -line-numbers %t.o | FileCheck --check-prefix=LINE %t.ll
|
||||
; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa -mcpu=gfx802 -disassemble -source %t.o | FileCheck --check-prefix=SOURCE %t.ll
|
||||
|
||||
; Prologue.
|
||||
; LINE: source_lines_test:
|
||||
@ -38,7 +38,7 @@
|
||||
; ModuleID = 'source-lines.cl'
|
||||
source_filename = "source-lines.cl"
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
target triple = "amdgcn-amd-amdhsa-amdgiz"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define amdgpu_kernel void @source_lines_test(i32 addrspace(1)* %Out) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
|
||||
|
Loading…
Reference in New Issue
Block a user