mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
3ba57cf8ef
This patch applies the idea from D93734 to LoopUnswitch. It adds support for unswitching on conditions that are only invariant along certain paths through a loop. In particular, it targets conditions in the loop header that depend on values loaded from memory. If either path from the true or false successor through the loop does not modify memory, perform partial loop unswitching. That is, duplicate the instructions feeding the condition in the pre-header. Then unswitch on the duplicated condition. The condition is now known in the unswitched version for the 'invariant' path through the original loop. On caveat of this approach is that one of the loops created can be partially unswitched again. To avoid this behavior, `llvm.loop.unswitch.partial.disable` metadata is added to the unswitched loops, to avoid subsequent partial unswitching. If that's the approach to go, I can move the code handling the metadata kind into separate functions. This increases the cases we unswitch quite a bit in SPEC2006/SPEC2000 & MultiSource. It also allows us to eliminate a dead loop in SPEC2017's omnetpp ``` Tests: 236 Same hash: 170 (filtered out) Remaining: 66 Metric: loop-unswitch.NumBranches Program base patch diff test-suite...000/255.vortex/255.vortex.test 2.00 23.00 1050.0% test-suite...T2006/401.bzip2/401.bzip2.test 7.00 55.00 685.7% test-suite :: External/Nurbs/nurbs.test 5.00 26.00 420.0% test-suite...s-C/unix-smail/unix-smail.test 1.00 3.00 200.0% test-suite.../Prolangs-C++/ocean/ocean.test 1.00 3.00 200.0% test-suite...tions/lambda-0.1.3/lambda.test 1.00 3.00 200.0% test-suite...yApps-C++/PENNANT/PENNANT.test 2.00 5.00 150.0% test-suite...marks/Ptrdist/yacr2/yacr2.test 1.00 2.00 100.0% test-suite...lications/viterbi/viterbi.test 1.00 2.00 100.0% test-suite...plications/d/make_dparser.test 12.00 24.00 100.0% test-suite...CFP2006/433.milc/433.milc.test 14.00 27.00 92.9% test-suite.../Applications/lemon/lemon.test 7.00 12.00 71.4% test-suite...ce/Applications/Burg/burg.test 6.00 10.00 66.7% test-suite...T2006/473.astar/473.astar.test 16.00 26.00 62.5% test-suite...marks/7zip/7zip-benchmark.test 78.00 121.00 55.1% ``` Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93764
49 lines
1.6 KiB
LLVM
49 lines
1.6 KiB
LLVM
; RUN: opt -loop-unswitch -loop-unswitch-memoryssa-threshold=0 -memssa-check-limit=1 -enable-new-pm=0 -S %s | FileCheck --check-prefix=THRESHOLD-0 %s
|
|
; RUN: opt -loop-unswitch -memssa-check-limit=1 -S -enable-new-pm=0 %s | FileCheck --check-prefix=THRESHOLD-DEFAULT %s
|
|
|
|
; Make sure -loop-unswitch-memoryssa-threshold works. The test uses
|
|
; -memssa-check-limit=1 to effectively disable any MemorySSA optimizations
|
|
; on construction, so the test can be kept simple.
|
|
|
|
declare void @clobber()
|
|
|
|
; Partial unswitching is possible, because the store in %noclobber does not
|
|
; alias the load of the condition.
|
|
define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) {
|
|
; THRESHOLD-0-LABEL: @partial_unswitch_true_successor
|
|
; THRESHOLD-0: entry:
|
|
; THRESHOLD-0: br label %loop.header
|
|
;
|
|
; THRESHOLD-DEFAULT-LABEL: @partial_unswitch_true_successor
|
|
; THRESHOLD-DEFAULT-NEXT: entry:
|
|
; THRESHOLD-DEFAULT-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4
|
|
; THRESHOLD-DEFAULT-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
|
|
; THRESHOLD-DEFAULT-NEXT: br i1 [[C]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%lv = load i32, i32* %ptr.1
|
|
%sc = icmp eq i32 %lv, 100
|
|
br i1 %sc, label %noclobber, label %clobber
|
|
|
|
noclobber:
|
|
%gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv
|
|
store i32 %lv, i32* %gep.1
|
|
br label %loop.latch
|
|
|
|
clobber:
|
|
call void @clobber()
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%c = icmp ult i32 %iv, %N
|
|
%iv.next = add i32 %iv, 1
|
|
br i1 %c, label %loop.header, label %exit
|
|
|
|
exit:
|
|
ret i32 10
|
|
}
|