mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
AMDGPU: Use a custom areInlineCompatible
Fixes not inlining OpenCL library functions on AMDGPU, which don't have an explicitly set target-cpu. llvm-svn: 310269
This commit is contained in:
parent
3e1c1179c0
commit
8f9d2dbe22
@ -534,3 +534,16 @@ unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Inde
|
||||
|
||||
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const {
|
||||
const TargetMachine &TM = getTLI()->getTargetMachine();
|
||||
const FeatureBitset &CallerBits =
|
||||
TM.getSubtargetImpl(*Caller)->getFeatureBits();
|
||||
const FeatureBitset &CalleeBits =
|
||||
TM.getSubtargetImpl(*Callee)->getFeatureBits();
|
||||
|
||||
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
|
||||
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
|
||||
return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
|
||||
}
|
||||
|
@ -34,6 +34,32 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
bool IsGraphicsShader;
|
||||
|
||||
|
||||
const FeatureBitset InlineFeatureIgnoreList = {
|
||||
// Codegen control options which don't matter.
|
||||
AMDGPU::FeatureEnableLoadStoreOpt,
|
||||
AMDGPU::FeatureEnableSIScheduler,
|
||||
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
|
||||
AMDGPU::FeatureFlatForGlobal,
|
||||
AMDGPU::FeaturePromoteAlloca,
|
||||
AMDGPU::FeatureUnalignedBufferAccess,
|
||||
AMDGPU::FeatureUnalignedScratchAccess,
|
||||
|
||||
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
|
||||
AMDGPU::FeatureDebuggerEmitPrologue,
|
||||
AMDGPU::FeatureDebuggerInsertNops,
|
||||
AMDGPU::FeatureDebuggerReserveRegs,
|
||||
|
||||
// Property of the kernel/environment which can't actually differ.
|
||||
AMDGPU::FeatureSGPRInitBug,
|
||||
AMDGPU::FeatureXNACK,
|
||||
AMDGPU::FeatureTrapHandler,
|
||||
|
||||
// Perf-tuning features
|
||||
AMDGPU::FeatureFastFMAF32,
|
||||
AMDGPU::HalfRate64Ops
|
||||
};
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
@ -121,6 +147,9 @@ public:
|
||||
|
||||
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
Type *SubTp);
|
||||
|
||||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
90
test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
Normal file
90
test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
Normal file
@ -0,0 +1,90 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @func_no_target_cpu(
|
||||
define i32 @func_no_target_cpu() #0 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @target_cpu_call_no_target_cpu(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @target_cpu_call_no_target_cpu() #1 {
|
||||
%call = call i32 @func_no_target_cpu()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
|
||||
%call = call i32 @func_no_target_cpu()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fp32_denormals(
|
||||
define i32 @fp32_denormals() #3 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
|
||||
; CHECK-NEXT: call i32 @fp32_denormals()
|
||||
define i32 @no_fp32_denormals_call_f32_denormals() #4 {
|
||||
%call = call i32 @fp32_denormals()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; Make sure gfx9 can call unspecified functions because of movrel
|
||||
; feature change.
|
||||
; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @gfx9_target_features_call_no_target_cpu() #5 {
|
||||
%call = call i32 @func_no_target_cpu()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
define i32 @func_no_halfrate64ops() #6 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @func_with_halfrate64ops() #7 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @call_func_without_halfrate64ops(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @call_func_without_halfrate64ops() #7 {
|
||||
%call = call i32 @func_no_halfrate64ops()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @call_func_with_halfrate64ops(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @call_func_with_halfrate64ops() #6 {
|
||||
%call = call i32 @func_with_halfrate64ops()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
define i32 @func_no_loadstoreopt() #8 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @func_with_loadstoreopt() #9 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @call_func_without_loadstoreopt(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @call_func_without_loadstoreopt() #9 {
|
||||
%call = call i32 @func_no_loadstoreopt()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "target-cpu"="fiji" }
|
||||
attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
|
||||
attributes #3 = { nounwind "target-features"="+fp32-denormals" }
|
||||
attributes #4 = { nounwind "target-features"="-fp32-denormals" }
|
||||
attributes #5 = { nounwind "target-cpu"="gfx900" }
|
||||
attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
|
||||
attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
|
||||
attributes #8 = { nounwind "target-features"="-load-store-opt" }
|
||||
attributes #9 = { nounwind "target-features"="+load-store-opt" }
|
2
test/Transforms/Inline/AMDGPU/lit.local.cfg
Normal file
2
test/Transforms/Inline/AMDGPU/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
Loading…
Reference in New Issue
Block a user