1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

AMDGPU: Implement per-function subtargets

llvm-svn: 273940
This commit is contained in:
Matt Arsenault 2016-06-27 20:48:03 +00:00
parent 214b515f81
commit 3bfa69bfd5
5 changed files with 188 additions and 42 deletions

View File

@ -202,17 +202,7 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AMDGPUSubtarget(TT, GPU, FS, TM),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
TLInfo(TM, *this) {
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
AMDGPUGISelActualAccessor *GISel =
new AMDGPUGISelActualAccessor();
GISel->CallLoweringInfo.reset(
new AMDGPUCallLowering(*getTargetLowering()));
#endif
setGISelAccessor(*GISel);
}
TLInfo(TM, *this) {}
unsigned R600Subtarget::getStackEntrySize() const {
switch (getWavefrontSize()) {

View File

@ -145,6 +145,20 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
Attribute GPUAttr = F.getFnAttribute("target-cpu");
return GPUAttr.hasAttribute(Attribute::None) ?
getTargetCPU() : GPUAttr.getValueAsString();
}
StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
Attribute FSAttr = F.getFnAttribute("target-features");
return FSAttr.hasAttribute(Attribute::None) ?
getTargetFeatureString() :
FSAttr.getValueAsString();
}
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
//===----------------------------------------------------------------------===//
@ -154,8 +168,27 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, getTargetCPU(), FS, *this) {}
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
const Function &F) const {
StringRef GPU = getGPUName(F);
StringRef FS = getFeatureString(F);
SmallString<128> SubtargetKey(GPU);
SubtargetKey.append(FS);
auto &I = SubtargetMap[SubtargetKey];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
}
return I.get();
}
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
@ -166,8 +199,34 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, getTargetCPU(), FS, *this) {}
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
StringRef GPU = getGPUName(F);
StringRef FS = getFeatureString(F);
SmallString<128> SubtargetKey(GPU);
SubtargetKey.append(FS);
auto &I = SubtargetMap[SubtargetKey];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
#endif
I->setGISelAccessor(*GISel);
}
return I.get();
}
//===----------------------------------------------------------------------===//
// AMDGPU Pass Setup
@ -244,8 +303,7 @@ public:
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(
AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
return TargetTransformInfo(AMDGPUTTIImpl(this, F));
});
}

View File

@ -29,6 +29,9 @@ protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
public:
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
@ -55,7 +58,7 @@ public:
class R600TargetMachine final : public AMDGPUTargetMachine {
private:
R600Subtarget Subtarget;
mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
public:
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@ -65,13 +68,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
const R600Subtarget *getSubtargetImpl() const {
return &Subtarget;
}
const R600Subtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const R600Subtarget *getSubtargetImpl(const Function &) const override;
};
//===----------------------------------------------------------------------===//
@ -80,7 +77,7 @@ public:
class GCNTargetMachine final : public AMDGPUTargetMachine {
private:
SISubtarget Subtarget;
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
public:
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@ -90,21 +87,9 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
const SISubtarget *getSubtargetImpl() const {
return &Subtarget;
}
const SISubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const SISubtarget *getSubtargetImpl(const Function &) const override;
};
inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
if (getTargetTriple().getArch() == Triple::amdgcn)
return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
}
inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
const Function &F) const {
if (getTargetTriple().getArch() == Triple::amdgcn)

View File

@ -59,9 +59,10 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
}
public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
: BaseT(TM, DL), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)

View File

@ -0,0 +1,112 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
; CI+ intrinsic
declare void @llvm.amdgcn.s.dcache.inv.vol() #0
; VI+ intrinsic
declare void @llvm.amdgcn.s.dcache.wb() #0
; CHECK-LABEL: {{^}}target_none:
; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @target_none() #0 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
store i32 0, i32 addrspace(1)* %gep
ret void
}
; CHECK-LABEL: {{^}}target_tahiti:
; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @target_tahiti() #1 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
store i32 0, i32 addrspace(1)* %gep
ret void
}
; CHECK-LABEL: {{^}}target_bonaire:
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; CHECK: s_dcache_inv_vol
define void @target_bonaire() #3 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
store i32 0, i32 addrspace(1)* %gep
call void @llvm.amdgcn.s.dcache.inv.vol()
ret void
}
; CHECK-LABEL: {{^}}target_fiji:
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400
; CHECK: flat_store_dword
; CHECK: s_dcache_wb{{$}}
define void @target_fiji() #4 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
store i32 0, i32 addrspace(1)* %gep
call void @llvm.amdgcn.s.dcache.wb()
ret void
}
; CHECK-LABEL: {{^}}promote_alloca_enabled:
; CHECK: ds_read_b32
; CHECK: ; LDSByteSize: 5120
define void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
entry:
%stack = alloca [5 x i32], align 4
%tmp = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
%load = load i32, i32* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}promote_alloca_disabled:
; CHECK: SCRATCH_RSRC_DWORD0
; CHECK: SCRATCH_RSRC_DWORD1
; CHECK: ScratchSize: 24
define void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
entry:
%stack = alloca [5 x i32], align 4
%tmp = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
%load = load i32, i32* %arrayidx1
store i32 %load, i32 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "target-cpu"="tahiti" }
attributes #3 = { nounwind "target-cpu"="bonaire" }
attributes #4 = { nounwind "target-cpu"="fiji" }
attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" }
attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" }