mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
74564c24f8
making it no longer even remotely simple. The pass will now be more of a "full loop unswitching" pass rather than anything substantively simpler than any other approach. I plan to rename it accordingly once the dust settles. The key ideas of the new loop unswitcher are carried over for non-trivial unswitching: 1) Fully unswitch a branch or switch instruction from inside of a loop to outside of it. 2) Update the CFG and IR. This avoids needing to "remember" the unswitched branches as well as avoiding excessively cloning and reliance on complex parts of simplify-cfg to cleanup the cfg. 3) Update the analyses (where we can) rather than just blowing them away or relying on something else updating them. Sadly, #3 is somewhat compromised here as the dominator tree updates were too complex for me to want to reason about. I will need to make another attempt to do this now that we have a nice dynamic update API for dominators. However, we do adhere to #3 w.r.t. LoopInfo. This approach also adds an important principls specific to non-trivial unswitching: not *all* of the loop will be duplicated when unswitching. This fact allows us to compute the cost in terms of how much *duplicate* code is inserted rather than just on raw size. Unswitching conditions which essentialy partition loops will work regardless of the total loop size. Some remaining issues that I will be addressing in subsequent commits: - Handling unstructured control flow. - Unswitching 'switch' cases instead of just branches. - Moving to the dynamic update API for dominators. Some high-level, interesting limitationsV that folks might want to push on as follow-ups but that I don't have any immediate plans around: - We could be much more clever about not cloning things that will be deleted. In fact, we should be able to delete *nothing* and do a minimal number of clones. - There are many more interesting selection criteria for which branch to unswitch that we might want to look at. One that I'm interested in particularly are a set of conditions which all exit the loop and which can be merged into a single unswitched test of them. Differential revision: https://reviews.llvm.org/D34200 llvm-svn: 318549
502 lines
12 KiB
LLVM
502 lines
12 KiB
LLVM
; Specifically exercise the cost modeling for non-trivial loop unswitching.
|
|
;
|
|
; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
|
|
; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
|
|
|
|
declare void @a()
|
|
declare void @b()
|
|
declare void @x()
|
|
|
|
; First establish enough code size in the duplicated 'loop_begin' block to
|
|
; suppress unswitching.
|
|
define void @test_no_unswitch(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_no_unswitch(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; We shouldn't have unswitched into any other block either.
|
|
; CHECK-NOT: br i1 %cond
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
call void @x()
|
|
call void @x()
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
|
|
loop_b:
|
|
call void @b()
|
|
br label %loop_latch
|
|
|
|
loop_latch:
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; Now check that the smaller formulation of 'loop_begin' does in fact unswitch
|
|
; with our low threshold.
|
|
define void @test_unswitch(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_unswitch(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
; The 'loop_a' unswitched loop.
|
|
;
|
|
; CHECK: entry.split.us:
|
|
; CHECK-NEXT: br label %loop_begin.us
|
|
;
|
|
; CHECK: loop_begin.us:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_a.us
|
|
;
|
|
; CHECK: loop_a.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_latch.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
|
|
;
|
|
; CHECK: loop_exit.split.us:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_b:
|
|
call void @b()
|
|
br label %loop_latch
|
|
; The 'loop_b' unswitched loop.
|
|
;
|
|
; CHECK: entry.split:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_b
|
|
;
|
|
; CHECK: loop_b:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: br label %loop_latch
|
|
;
|
|
; CHECK: loop_latch:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
|
|
;
|
|
; CHECK: loop_exit.split:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_latch:
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: ret void
|
|
}
|
|
|
|
; Check that even with large amounts of code on either side of the unswitched
|
|
; branch, if that code would be kept in only one of the unswitched clones it
|
|
; doesn't contribute to the cost.
|
|
define void @test_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_unswitch_non_dup_code(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
br label %loop_latch
|
|
; The 'loop_a' unswitched loop.
|
|
;
|
|
; CHECK: entry.split.us:
|
|
; CHECK-NEXT: br label %loop_begin.us
|
|
;
|
|
; CHECK: loop_begin.us:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_a.us
|
|
;
|
|
; CHECK: loop_a.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_latch.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
|
|
;
|
|
; CHECK: loop_exit.split.us:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_b:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
br label %loop_latch
|
|
; The 'loop_b' unswitched loop.
|
|
;
|
|
; CHECK: entry.split:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_b
|
|
;
|
|
; CHECK: loop_b:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: br label %loop_latch
|
|
;
|
|
; CHECK: loop_latch:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
|
|
;
|
|
; CHECK: loop_exit.split:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_latch:
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: ret void
|
|
}
|
|
|
|
; Much like with non-duplicated code directly in the successor, we also won't
|
|
; duplicate even interesting CFGs.
|
|
define void @test_unswitch_non_dup_code_in_cfg(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_unswitch_non_dup_code_in_cfg(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
%v1 = load i1, i1* %ptr
|
|
br i1 %v1, label %loop_a_a, label %loop_a_b
|
|
|
|
loop_a_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
|
|
loop_a_b:
|
|
call void @a()
|
|
br label %loop_latch
|
|
; The 'loop_a' unswitched loop.
|
|
;
|
|
; CHECK: entry.split.us:
|
|
; CHECK-NEXT: br label %loop_begin.us
|
|
;
|
|
; CHECK: loop_begin.us:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_a.us
|
|
;
|
|
; CHECK: loop_a.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_a_a.us, label %loop_a_b.us
|
|
;
|
|
; CHECK: loop_a_b.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_a_a.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_latch.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
|
|
;
|
|
; CHECK: loop_exit.split.us:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_b:
|
|
%v2 = load i1, i1* %ptr
|
|
br i1 %v2, label %loop_b_a, label %loop_b_b
|
|
|
|
loop_b_a:
|
|
call void @b()
|
|
br label %loop_latch
|
|
|
|
loop_b_b:
|
|
call void @b()
|
|
br label %loop_latch
|
|
; The 'loop_b' unswitched loop.
|
|
;
|
|
; CHECK: entry.split:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_b
|
|
;
|
|
; CHECK: loop_b:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_b_a, label %loop_b_b
|
|
;
|
|
; CHECK: loop_b_a:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: br label %loop_latch
|
|
;
|
|
; CHECK: loop_b_b:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: br label %loop_latch
|
|
;
|
|
; CHECK: loop_latch:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
|
|
;
|
|
; CHECK: loop_exit.split:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_latch:
|
|
%v3 = load i1, i1* %ptr
|
|
br i1 %v3, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: ret void
|
|
}
|
|
|
|
; Check that even if there is *some* non-duplicated code on one side of an
|
|
; unswitch, we don't count any other code in the loop that will in fact have to
|
|
; be duplicated.
|
|
define void @test_no_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_no_unswitch_non_dup_code(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; We shouldn't have unswitched into any other block either.
|
|
; CHECK-NOT: br i1 %cond
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
%v1 = load i1, i1* %ptr
|
|
br i1 %v1, label %loop_a_a, label %loop_a_b
|
|
|
|
loop_a_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
|
|
loop_a_b:
|
|
call void @a()
|
|
br label %loop_latch
|
|
|
|
loop_b:
|
|
%v2 = load i1, i1* %ptr
|
|
br i1 %v2, label %loop_b_a, label %loop_b_b
|
|
|
|
loop_b_a:
|
|
call void @b()
|
|
br label %loop_latch
|
|
|
|
loop_b_b:
|
|
call void @b()
|
|
br label %loop_latch
|
|
|
|
loop_latch:
|
|
call void @x()
|
|
call void @x()
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; Check that we still unswitch when the exit block contains lots of code, even
|
|
; though we do clone the exit block as part of unswitching. This should work
|
|
; because we should split the exit block before anything inside it.
|
|
define void @test_unswitch_large_exit(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_unswitch_large_exit(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b
|
|
|
|
loop_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
; The 'loop_a' unswitched loop.
|
|
;
|
|
; CHECK: entry.split.us:
|
|
; CHECK-NEXT: br label %loop_begin.us
|
|
;
|
|
; CHECK: loop_begin.us:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_a.us
|
|
;
|
|
; CHECK: loop_a.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_latch.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
|
|
;
|
|
; CHECK: loop_exit.split.us:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_b:
|
|
call void @b()
|
|
br label %loop_latch
|
|
; The 'loop_b' unswitched loop.
|
|
;
|
|
; CHECK: entry.split:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_b
|
|
;
|
|
; CHECK: loop_b:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: br label %loop_latch
|
|
;
|
|
; CHECK: loop_latch:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split
|
|
;
|
|
; CHECK: loop_exit.split:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_latch:
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
call void @x()
|
|
call void @x()
|
|
call void @x()
|
|
call void @x()
|
|
ret void
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: ret void
|
|
}
|
|
|
|
; Check that we handle a dedicated exit edge unswitch which is still
|
|
; non-trivial and has lots of code in the exit.
|
|
define void @test_unswitch_dedicated_exiting(i1* %ptr, i1 %cond) {
|
|
; CHECK-LABEL: @test_unswitch_dedicated_exiting(
|
|
entry:
|
|
br label %loop_begin
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
|
|
|
|
loop_begin:
|
|
call void @x()
|
|
br i1 %cond, label %loop_a, label %loop_b_exit
|
|
|
|
loop_a:
|
|
call void @a()
|
|
br label %loop_latch
|
|
; The 'loop_a' unswitched loop.
|
|
;
|
|
; CHECK: entry.split.us:
|
|
; CHECK-NEXT: br label %loop_begin.us
|
|
;
|
|
; CHECK: loop_begin.us:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_a.us
|
|
;
|
|
; CHECK: loop_a.us:
|
|
; CHECK-NEXT: call void @a()
|
|
; CHECK-NEXT: br label %loop_latch.us
|
|
;
|
|
; CHECK: loop_latch.us:
|
|
; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr
|
|
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
|
|
;
|
|
; CHECK: loop_exit.split.us:
|
|
; CHECK-NEXT: br label %loop_exit
|
|
|
|
loop_b_exit:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
ret void
|
|
; The 'loop_b_exit' unswitched exit path.
|
|
;
|
|
; CHECK: entry.split:
|
|
; CHECK-NEXT: br label %loop_begin
|
|
;
|
|
; CHECK: loop_begin:
|
|
; CHECK-NEXT: call void @x()
|
|
; CHECK-NEXT: br label %loop_b_exit
|
|
;
|
|
; CHECK: loop_b_exit:
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: call void @b()
|
|
; CHECK-NEXT: ret void
|
|
|
|
loop_latch:
|
|
%v = load i1, i1* %ptr
|
|
br i1 %v, label %loop_begin, label %loop_exit
|
|
|
|
loop_exit:
|
|
ret void
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: ret void
|
|
}
|