mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
d523d136d2
In r286814, the algorithm for calculating inline costs changed. This caused more inlining to take place which is especially apparent in optsize and minsize modes. As the cost calculation removed a skewed behaviour (we were inconsistent about the cost of calls) it isn't possible to update the thresholds to get exactly the same behaviour as before. However, this threshold change accounts for the very common case where an inline candidate has no calls within it. In this case, r286814 would inline around 5-6 more (IR) instructions. The changes to -Oz have been heavily benchmarked. The "obvious" value for the inline threshold at -Oz is zero, but due to inaccuracies in the inline heuristics this can actually cause code size increases due to not inlining key thunk functions (that then disappear). Experimentally, 5 was the sweet spot for code size over the test-suite. For -Os, this change removes the outlier results shown up by green dragon (http://104.154.54.203/db_default/v4/nts/13248). Fixes D26848. llvm-svn: 288024
31 lines
680 B
LLVM
31 lines
680 B
LLVM
; RUN: opt -S -Oz %s | FileCheck %s
|
|
|
|
@a = global i32 4
|
|
|
|
define i32 @inner() {
|
|
%a1 = load volatile i32, i32* @a
|
|
|
|
; Here are enough instructions to prevent inlining, but because they are used
|
|
; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
|
|
; still happen).
|
|
%a2 = mul i32 %a1, %a1
|
|
%a3 = sub i32 %a1, 5
|
|
%a4 = udiv i32 %a3, -13
|
|
%a5 = mul i32 %a4, %a4
|
|
%a6 = add i32 %a5, %a5
|
|
%ca = icmp sgt i32 %a6, -7
|
|
tail call void @llvm.assume(i1 %ca)
|
|
|
|
ret i32 %a1
|
|
}
|
|
|
|
; @inner() should be inlined for -Oz.
|
|
; CHECK-NOT: call i1 @inner
|
|
define i32 @outer() optsize {
|
|
%r = call i32 @inner()
|
|
ret i32 %r
|
|
}
|
|
|
|
declare void @llvm.assume(i1) nounwind
|
|
|