mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
90665f11d7
minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
153 lines
3.3 KiB
LLVM
153 lines
3.3 KiB
LLVM
; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
|
|
;
|
|
; Ensure the threshold has no impact on these decisions.
|
|
; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
|
|
; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
|
|
;
|
|
; The new pass manager doesn't re-use any threshold based infrastructure for
|
|
; the always inliner, but test that we get the correct result.
|
|
; RUN: opt < %s -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
|
|
|
|
define i32 @inner1() alwaysinline {
|
|
ret i32 1
|
|
}
|
|
define i32 @outer1() {
|
|
; CHECK-LABEL: @outer1(
|
|
; CHECK-NOT: call
|
|
; CHECK: ret
|
|
|
|
%r = call i32 @inner1()
|
|
ret i32 %r
|
|
}
|
|
|
|
; The always inliner can't DCE internal functions. PR2945
|
|
; CHECK-LABEL: @pr2945(
|
|
define internal i32 @pr2945() nounwind {
|
|
ret i32 0
|
|
}
|
|
|
|
define internal void @inner2(i32 %N) alwaysinline {
|
|
%P = alloca i32, i32 %N
|
|
ret void
|
|
}
|
|
define void @outer2(i32 %N) {
|
|
; The always inliner (unlike the normal one) should be willing to inline
|
|
; a function with a dynamic alloca into one without a dynamic alloca.
|
|
; rdar://6655932
|
|
;
|
|
; CHECK-LABEL: @outer2(
|
|
; CHECK-NOT: call void @inner2
|
|
; CHECK-NOT: call void @inner2
|
|
; CHECK: ret void
|
|
|
|
call void @inner2( i32 %N )
|
|
ret void
|
|
}
|
|
|
|
declare i32 @a() returns_twice
|
|
declare i32 @b() returns_twice
|
|
|
|
define i32 @inner3() alwaysinline {
|
|
entry:
|
|
%call = call i32 @a() returns_twice
|
|
%add = add nsw i32 1, %call
|
|
ret i32 %add
|
|
}
|
|
define i32 @outer3() {
|
|
entry:
|
|
; CHECK-LABEL: @outer3(
|
|
; CHECK-NOT: call i32 @a
|
|
; CHECK: ret
|
|
|
|
%call = call i32 @inner3()
|
|
%add = add nsw i32 1, %call
|
|
ret i32 %add
|
|
}
|
|
|
|
define i32 @inner4() alwaysinline returns_twice {
|
|
entry:
|
|
%call = call i32 @b() returns_twice
|
|
%add = add nsw i32 1, %call
|
|
ret i32 %add
|
|
}
|
|
|
|
define i32 @outer4() {
|
|
entry:
|
|
; CHECK-LABEL: @outer4(
|
|
; CHECK: call i32 @b()
|
|
; CHECK: ret
|
|
|
|
%call = call i32 @inner4() returns_twice
|
|
%add = add nsw i32 1, %call
|
|
ret i32 %add
|
|
}
|
|
|
|
define i32 @inner5(i8* %addr) alwaysinline {
|
|
entry:
|
|
indirectbr i8* %addr, [ label %one, label %two ]
|
|
|
|
one:
|
|
ret i32 42
|
|
|
|
two:
|
|
ret i32 44
|
|
}
|
|
define i32 @outer5(i32 %x) {
|
|
; CHECK-LABEL: @outer5(
|
|
; CHECK: call i32 @inner5
|
|
; CHECK: ret
|
|
|
|
%cmp = icmp slt i32 %x, 42
|
|
%addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two)
|
|
%call = call i32 @inner5(i8* %addr)
|
|
ret i32 %call
|
|
}
|
|
|
|
define void @inner6(i32 %x) alwaysinline {
|
|
entry:
|
|
%icmp = icmp slt i32 %x, 0
|
|
br i1 %icmp, label %return, label %bb
|
|
|
|
bb:
|
|
%sub = sub nsw i32 %x, 1
|
|
call void @inner6(i32 %sub)
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
define void @outer6() {
|
|
; CHECK-LABEL: @outer6(
|
|
; CHECK: call void @inner6(i32 42)
|
|
; CHECK: ret
|
|
|
|
entry:
|
|
call void @inner6(i32 42)
|
|
ret void
|
|
}
|
|
|
|
define i32 @inner7() {
|
|
ret i32 1
|
|
}
|
|
define i32 @outer7() {
|
|
; CHECK-CALL-LABEL: @outer7(
|
|
; CHECK-CALL-NOT: call
|
|
; CHECK-CALL: ret
|
|
|
|
%r = call i32 @inner7() alwaysinline
|
|
ret i32 %r
|
|
}
|
|
|
|
define float* @inner8(float* nocapture align 128 %a) alwaysinline {
|
|
ret float* %a
|
|
}
|
|
define float @outer8(float* nocapture %a) {
|
|
; CHECK-LABEL: @outer8(
|
|
; CHECK-NOT: call float* @inner8
|
|
; CHECK: ret
|
|
|
|
%inner_a = call float* @inner8(float* %a)
|
|
%f = load float, float* %inner_a, align 4
|
|
ret float %f
|
|
}
|