mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 19:42:54 +02:00
[ARM] Inline callee if its target-features are a subset of the caller
Summary: Similar to X86, it should be safe to inline callees if their target-features are a subset of the caller. As some subtarget features provide different instructions depending on whether they are set or unset (e.g. ThumbMode and ModeSoftFloat), we use a whitelist of target-features describing hardware capabilities only. Reviewers: kristof.beyls, rengolin, t.p.northover, SjoerdMeijer, peter.smith, silviu.baranga, efriedma Reviewed By: SjoerdMeijer, efriedma Subscribers: dschuff, efriedma, aemerson, sdardis, javed.absar, arichardson, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D34697 llvm-svn: 307889
This commit is contained in:
parent
dcba8216a6
commit
17accd7982
@ -15,6 +15,24 @@ using namespace llvm;
|
|||||||
|
|
||||||
#define DEBUG_TYPE "armtti"
|
#define DEBUG_TYPE "armtti"
|
||||||
|
|
||||||
|
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
|
||||||
|
const Function *Callee) const {
|
||||||
|
const TargetMachine &TM = getTLI()->getTargetMachine();
|
||||||
|
const FeatureBitset &CallerBits =
|
||||||
|
TM.getSubtargetImpl(*Caller)->getFeatureBits();
|
||||||
|
const FeatureBitset &CalleeBits =
|
||||||
|
TM.getSubtargetImpl(*Callee)->getFeatureBits();
|
||||||
|
|
||||||
|
// To inline a callee, all features not in the whitelist must match exactly.
|
||||||
|
bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
|
||||||
|
(CalleeBits & ~InlineFeatureWhitelist);
|
||||||
|
// For features in the whitelist, the callee's features must be a subset of
|
||||||
|
// the callers'.
|
||||||
|
bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
|
||||||
|
(CalleeBits & InlineFeatureWhitelist);
|
||||||
|
return MatchExact && MatchSubset;
|
||||||
|
}
|
||||||
|
|
||||||
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
|
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
|
||||||
assert(Ty->isIntegerTy());
|
assert(Ty->isIntegerTy());
|
||||||
|
|
||||||
|
@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
|
|||||||
const ARMSubtarget *ST;
|
const ARMSubtarget *ST;
|
||||||
const ARMTargetLowering *TLI;
|
const ARMTargetLowering *TLI;
|
||||||
|
|
||||||
|
// Currently the following features are excluded from InlineFeatureWhitelist.
|
||||||
|
// ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
|
||||||
|
// Depending on whether they are set or unset, different
|
||||||
|
// instructions/registers are available. For example, inlining a callee with
|
||||||
|
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
|
||||||
|
// fail if the callee uses ARM only instructions, e.g. in inline asm.
|
||||||
|
const FeatureBitset InlineFeatureWhitelist = {
|
||||||
|
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
|
||||||
|
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
|
||||||
|
ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
|
||||||
|
ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
|
||||||
|
ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
|
||||||
|
ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
|
||||||
|
ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
|
||||||
|
ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
|
||||||
|
ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
|
||||||
|
ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
|
||||||
|
ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
|
||||||
|
ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
|
||||||
|
ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
|
||||||
|
ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
|
||||||
|
ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
|
||||||
|
ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
|
||||||
|
ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
|
||||||
|
ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
|
||||||
|
ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
|
||||||
|
ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
|
||||||
|
ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
|
||||||
|
ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
|
||||||
|
ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
|
||||||
|
ARM::FeatureNoNegativeImmediates
|
||||||
|
};
|
||||||
|
|
||||||
const ARMSubtarget *getST() const { return ST; }
|
const ARMSubtarget *getST() const { return ST; }
|
||||||
const ARMTargetLowering *getTLI() const { return TLI; }
|
const ARMTargetLowering *getTLI() const { return TLI; }
|
||||||
|
|
||||||
@ -41,6 +74,9 @@ public:
|
|||||||
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
||||||
TLI(ST->getTargetLowering()) {}
|
TLI(ST->getTargetLowering()) {}
|
||||||
|
|
||||||
|
bool areInlineCompatible(const Function *Caller,
|
||||||
|
const Function *Callee) const;
|
||||||
|
|
||||||
bool enableInterleavedAccessVectorization() { return true; }
|
bool enableInterleavedAccessVectorization() { return true; }
|
||||||
|
|
||||||
/// Floating-point computation using ARMv8 AArch32 Advanced
|
/// Floating-point computation using ARMv8 AArch32 Advanced
|
||||||
|
60
test/Transforms/Inline/ARM/inline-target-attr.ll
Normal file
60
test/Transforms/Inline/ARM/inline-target-attr.ll
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s
|
||||||
|
; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
|
||||||
|
; Check that we only inline when we have compatible target attributes.
|
||||||
|
; ARM has implemented a target attribute that will verify that the attribute
|
||||||
|
; sets are compatible.
|
||||||
|
|
||||||
|
define i32 @foo() #0 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 (...) @baz()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: foo
|
||||||
|
; CHECK: call i32 (...) @baz()
|
||||||
|
}
|
||||||
|
declare i32 @baz(...) #0
|
||||||
|
|
||||||
|
define i32 @bar() #1 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 @foo()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: bar
|
||||||
|
; CHECK: call i32 (...) @baz()
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @qux() #0 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 @bar()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: qux
|
||||||
|
; CHECK: call i32 @bar()
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @thumb_fn() #2 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 @foo()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: thumb_fn
|
||||||
|
; CHECK: call i32 @foo
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @strict_align() #3 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 @foo()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: strict_align
|
||||||
|
; CHECK: call i32 (...) @baz()
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @soft_float_fn() #4 {
|
||||||
|
entry:
|
||||||
|
%call = call i32 @foo()
|
||||||
|
ret i32 %call
|
||||||
|
; CHECK-LABEL: thumb_fn
|
||||||
|
; CHECK: call i32 @foo
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
|
||||||
|
attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" }
|
||||||
|
attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" }
|
||||||
|
attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" }
|
||||||
|
attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" }
|
2
test/Transforms/Inline/ARM/lit.local.cfg
Normal file
2
test/Transforms/Inline/ARM/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
if not 'ARM' in config.root.targets:
|
||||||
|
config.unsupported = True
|
Loading…
Reference in New Issue
Block a user