1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[AArch64] Inline callee if its target-features are a subset of the caller

Summary:
Similar to X86, it should be safe to inline callees if their target-features
are a subset of the caller. This change matches GCC's inlining behavior
with respect to attributes [1].

[1] https://gcc.gnu.org/onlinedocs/gcc/AArch64-Function-Attributes.html#AArch64-Function-Attributes

Reviewers: kristof.beyls, javed.absar, rengolin, t.p.northover

Reviewed By: t.p.northover

Subscribers: aemerson, eraman, llvm-commits

Differential Revision: https://reviews.llvm.org/D34698

llvm-svn: 306478
This commit is contained in:
Florian Hahn 2017-06-27 22:27:32 +00:00
parent ee11ba5b52
commit 5722baf65f
3 changed files with 57 additions and 0 deletions

View File

@ -20,6 +20,20 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
TM.getSubtargetImpl(*Caller)->getFeatureBits();
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
// Inline a callee if its target-features are a subset of the callers
// target-features.
return (CallerBits & CalleeBits) == CalleeBits;
}
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.

View File

@ -51,6 +51,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
/// \name Scalar TTI Implementations
/// @{

View File

@ -0,0 +1,40 @@
; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -inline | FileCheck %s
; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
; Check that we only inline when we have compatible target attributes.
define i32 @foo() #0 {
entry:
%call = call i32 (...) @baz()
ret i32 %call
; CHECK-LABEL: foo
; CHECK: call i32 (...) @baz()
}
declare i32 @baz(...) #0
define i32 @bar() #1 {
entry:
%call = call i32 @foo()
ret i32 %call
; CHECK-LABEL: bar
; CHECK: call i32 (...) @baz()
}
define i32 @qux() #0 {
entry:
%call = call i32 @bar()
ret i32 %call
; CHECK-LABEL: qux
; CHECK: call i32 @bar()
}
define i32 @strict_align() #2 {
entry:
%call = call i32 @foo()
ret i32 %call
; CHECK-LABEL: strict_align
; CHECK: call i32 (...) @baz()
}
attributes #0 = { "target-cpu"="generic" "target-features"="+crc,+neon" }
attributes #1 = { "target-cpu"="generic" "target-features"="+crc,+neon,+crypto" }
attributes #2 = { "target-cpu"="generic" "target-features"="+crc,+neon,+strict-align" }