mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AMDGPU: Implement per-function subtargets
llvm-svn: 273940
This commit is contained in:
parent
214b515f81
commit
3bfa69bfd5
@ -202,17 +202,7 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
AMDGPUSubtarget(TT, GPU, FS, TM),
|
||||
InstrInfo(*this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
TLInfo(TM, *this) {
|
||||
#ifndef LLVM_BUILD_GLOBAL_ISEL
|
||||
GISelAccessor *GISel = new GISelAccessor();
|
||||
#else
|
||||
AMDGPUGISelActualAccessor *GISel =
|
||||
new AMDGPUGISelActualAccessor();
|
||||
GISel->CallLoweringInfo.reset(
|
||||
new AMDGPUCallLowering(*getTargetLowering()));
|
||||
#endif
|
||||
setGISelAccessor(*GISel);
|
||||
}
|
||||
TLInfo(TM, *this) {}
|
||||
|
||||
unsigned R600Subtarget::getStackEntrySize() const {
|
||||
switch (getWavefrontSize()) {
|
||||
|
@ -145,6 +145,20 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
|
||||
|
||||
AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
|
||||
|
||||
StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
|
||||
Attribute GPUAttr = F.getFnAttribute("target-cpu");
|
||||
return GPUAttr.hasAttribute(Attribute::None) ?
|
||||
getTargetCPU() : GPUAttr.getValueAsString();
|
||||
}
|
||||
|
||||
StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
|
||||
Attribute FSAttr = F.getFnAttribute("target-features");
|
||||
|
||||
return FSAttr.hasAttribute(Attribute::None) ?
|
||||
getTargetFeatureString() :
|
||||
FSAttr.getValueAsString();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 Target Machine (R600 -> Cayman)
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -154,8 +168,27 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
|
||||
TargetOptions Options,
|
||||
Optional<Reloc::Model> RM,
|
||||
CodeModel::Model CM, CodeGenOpt::Level OL)
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, getTargetCPU(), FS, *this) {}
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
|
||||
|
||||
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
|
||||
const Function &F) const {
|
||||
StringRef GPU = getGPUName(F);
|
||||
StringRef FS = getFeatureString(F);
|
||||
|
||||
SmallString<128> SubtargetKey(GPU);
|
||||
SubtargetKey.append(FS);
|
||||
|
||||
auto &I = SubtargetMap[SubtargetKey];
|
||||
if (!I) {
|
||||
// This needs to be done before we create a new subtarget since any
|
||||
// creation will depend on the TM and the code generation flags on the
|
||||
// function that reside in TargetOptions.
|
||||
resetTargetOptions(F);
|
||||
I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
|
||||
}
|
||||
|
||||
return I.get();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GCN Target Machine (SI+)
|
||||
@ -166,8 +199,34 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
|
||||
TargetOptions Options,
|
||||
Optional<Reloc::Model> RM,
|
||||
CodeModel::Model CM, CodeGenOpt::Level OL)
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, getTargetCPU(), FS, *this) {}
|
||||
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
|
||||
|
||||
const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
||||
StringRef GPU = getGPUName(F);
|
||||
StringRef FS = getFeatureString(F);
|
||||
|
||||
SmallString<128> SubtargetKey(GPU);
|
||||
SubtargetKey.append(FS);
|
||||
|
||||
auto &I = SubtargetMap[SubtargetKey];
|
||||
if (!I) {
|
||||
// This needs to be done before we create a new subtarget since any
|
||||
// creation will depend on the TM and the code generation flags on the
|
||||
// function that reside in TargetOptions.
|
||||
resetTargetOptions(F);
|
||||
I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
|
||||
|
||||
#ifndef LLVM_BUILD_GLOBAL_ISEL
|
||||
GISelAccessor *GISel = new GISelAccessor();
|
||||
#else
|
||||
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
|
||||
#endif
|
||||
|
||||
I->setGISelAccessor(*GISel);
|
||||
}
|
||||
|
||||
return I.get();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU Pass Setup
|
||||
@ -244,8 +303,7 @@ public:
|
||||
|
||||
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
|
||||
return TargetIRAnalysis([this](const Function &F) {
|
||||
return TargetTransformInfo(
|
||||
AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
|
||||
return TargetTransformInfo(AMDGPUTTIImpl(this, F));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,9 @@ protected:
|
||||
std::unique_ptr<TargetLoweringObjectFile> TLOF;
|
||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
||||
|
||||
StringRef getGPUName(const Function &F) const;
|
||||
StringRef getFeatureString(const Function &F) const;
|
||||
|
||||
public:
|
||||
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
||||
StringRef FS, TargetOptions Options,
|
||||
@ -55,7 +58,7 @@ public:
|
||||
|
||||
class R600TargetMachine final : public AMDGPUTargetMachine {
|
||||
private:
|
||||
R600Subtarget Subtarget;
|
||||
mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
|
||||
|
||||
public:
|
||||
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
||||
@ -65,13 +68,7 @@ public:
|
||||
|
||||
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
|
||||
|
||||
const R600Subtarget *getSubtargetImpl() const {
|
||||
return &Subtarget;
|
||||
}
|
||||
|
||||
const R600Subtarget *getSubtargetImpl(const Function &) const override {
|
||||
return &Subtarget;
|
||||
}
|
||||
const R600Subtarget *getSubtargetImpl(const Function &) const override;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -80,7 +77,7 @@ public:
|
||||
|
||||
class GCNTargetMachine final : public AMDGPUTargetMachine {
|
||||
private:
|
||||
SISubtarget Subtarget;
|
||||
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
|
||||
|
||||
public:
|
||||
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
||||
@ -90,21 +87,9 @@ public:
|
||||
|
||||
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
|
||||
|
||||
const SISubtarget *getSubtargetImpl() const {
|
||||
return &Subtarget;
|
||||
}
|
||||
|
||||
const SISubtarget *getSubtargetImpl(const Function &) const override {
|
||||
return &Subtarget;
|
||||
}
|
||||
const SISubtarget *getSubtargetImpl(const Function &) const override;
|
||||
};
|
||||
|
||||
inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
|
||||
if (getTargetTriple().getArch() == Triple::amdgcn)
|
||||
return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
|
||||
return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
|
||||
}
|
||||
|
||||
inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
|
||||
const Function &F) const {
|
||||
if (getTargetTriple().getArch() == Triple::amdgcn)
|
||||
|
@ -59,9 +59,10 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
||||
}
|
||||
|
||||
public:
|
||||
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
|
||||
: BaseT(TM, DL), ST(TM->getSubtargetImpl()),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
|
||||
// Provide value semantics. MSVC requires that we spell all of these out.
|
||||
AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
|
||||
|
112
test/CodeGen/AMDGPU/target-cpu.ll
Normal file
112
test/CodeGen/AMDGPU/target-cpu.ll
Normal file
@ -0,0 +1,112 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; CI+ intrinsic
|
||||
declare void @llvm.amdgcn.s.dcache.inv.vol() #0
|
||||
|
||||
; VI+ intrinsic
|
||||
declare void @llvm.amdgcn.s.dcache.wb() #0
|
||||
|
||||
; CHECK-LABEL: {{^}}target_none:
|
||||
; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
define void @target_none() #0 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
store i32 0, i32 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}target_tahiti:
|
||||
; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
define void @target_tahiti() #1 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
store i32 0, i32 addrspace(1)* %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}target_bonaire:
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
; CHECK: s_dcache_inv_vol
|
||||
define void @target_bonaire() #3 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
store i32 0, i32 addrspace(1)* %gep
|
||||
call void @llvm.amdgcn.s.dcache.inv.vol()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}target_fiji:
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400
|
||||
; CHECK: flat_store_dword
|
||||
; CHECK: s_dcache_wb{{$}}
|
||||
define void @target_fiji() #4 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
store i32 0, i32 addrspace(1)* %gep
|
||||
call void @llvm.amdgcn.s.dcache.wb()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}promote_alloca_enabled:
|
||||
; CHECK: ds_read_b32
|
||||
; CHECK: ; LDSByteSize: 5120
|
||||
define void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
|
||||
%load = load i32, i32* %arrayidx1
|
||||
store i32 %load, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}promote_alloca_disabled:
|
||||
; CHECK: SCRATCH_RSRC_DWORD0
|
||||
; CHECK: SCRATCH_RSRC_DWORD1
|
||||
; CHECK: ScratchSize: 24
|
||||
define void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp
|
||||
%load = load i32, i32* %arrayidx1
|
||||
store i32 %load, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind "target-cpu"="tahiti" }
|
||||
attributes #3 = { nounwind "target-cpu"="bonaire" }
|
||||
attributes #4 = { nounwind "target-cpu"="fiji" }
|
||||
attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" }
|
||||
attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" }
|
Loading…
Reference in New Issue
Block a user