1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
Alexey Zhikhartsev 6516543c4b Add jump-threading optimization for deterministic finite automata
The current JumpThreading pass does not jump thread loops since it can
result in irreducible control flow that harms other optimizations. This
prevents switch statements inside a loop from being optimized to use
unconditional branches.

This code pattern occurs in the core_state_transition function of
Coremark. The state machine can be implemented manually with goto
statements resulting in a large runtime improvement, and this transform
makes the switch implementation match the goto version in performance.

This patch specifically targets switch statements inside a loop that
have the opportunity to be threaded. Once it identifies an opportunity,
it creates new paths that branch directly to the correct code block.
For example, the left CFG could be transformed to the right CFG:

```
          sw.bb                        sw.bb
        /   |   \                    /   |   \
   case1  case2  case3          case1  case2  case3
        \   |   /                /       |       \
        latch.bb             latch.2  latch.3  latch.1
         br sw.bb              /         |         \
                           sw.bb.2     sw.bb.3     sw.bb.1
                            br case2    br case3    br case1
```

Co-author: Justin Kreiner @jkreiner
Co-author: Ehsan Amiri @amehsan

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D99205
2021-07-27 14:34:04 -04:00

294 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dfa-jump-threading %s | FileCheck %s
; These tests check if selects are unfolded properly for jump threading
; opportunities. There are three different patterns to consider:
; 1) Both operands are constant and the false branch is unfolded by default
; 2) One operand is constant and the other is another select to be unfolded. In
; this case a single select is sunk to a new block to unfold.
; 3) Both operands are a select, and both should be sunk to new blocks.
define i32 @test1(i32 %num) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT2:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT1:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[COUNT1]], 50
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT1]], [[SI_UNFOLD_FALSE]] ], [ [[COUNT2]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[CASE1]] ], [ 2, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT1]], [[CASE2]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[CASE2]] ], [ 1, [[FOR_BODY]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp slt i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @test2(i32 %num) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt3:
; CHECK-NEXT: [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ]
; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT5:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_C1:%.*]] = icmp slt i32 [[COUNT5]], 50
; CHECK-NEXT: [[CMP2_C1:%.*]] = icmp slt i32 [[COUNT5]], 100
; CHECK-NEXT: br i1 [[CMP2_C1]], label [[SI_UNFOLD_TRUE:%.*]], label [[FOR_INC_JT3]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_C2:%.*]] = icmp slt i32 [[COUNT3]], 50
; CHECK-NEXT: [[CMP2_C2:%.*]] = icmp sgt i32 [[COUNT3]], 100
; CHECK-NEXT: br i1 [[CMP2_C2]], label [[FOR_INC_JT3]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br i1 [[CMP_C2]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE1:%.*]]
; CHECK: si.unfold.false1:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: si.unfold.true:
; CHECK-NEXT: br i1 [[CMP_C1]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE2:%.*]]
; CHECK: si.unfold.false2:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt3:
; CHECK-NEXT: [[COUNT6:%.*]] = phi i32 [ [[COUNT3]], [[CASE2]] ], [ [[COUNT5]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT3]] = phi i32 [ 3, [[CASE1]] ], [ 3, [[CASE2]] ]
; CHECK-NEXT: [[INC_JT3]] = add nsw i32 [[COUNT6]], 1
; CHECK-NEXT: [[CMP_EXIT_JT3:%.*]] = icmp slt i32 [[INC_JT3]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT3]], label [[FOR_BODY_JT3:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT7:%.*]] = phi i32 [ [[COUNT3]], [[SI_UNFOLD_FALSE1]] ], [ [[COUNT5]], [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[SI_UNFOLD_FALSE1]] ], [ 2, [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT7]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT3]], [[SI_UNFOLD_FALSE]] ], [ [[COUNT5]], [[SI_UNFOLD_TRUE]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[SI_UNFOLD_FALSE]] ], [ 1, [[SI_UNFOLD_TRUE]] ], [ 1, [[FOR_BODY_JT3]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
%cmp.c1 = icmp slt i32 %count, 50
%cmp2.c1 = icmp slt i32 %count, 100
%state1.1 = select i1 %cmp.c1, i32 1, i32 2
%state1.2 = select i1 %cmp2.c1, i32 %state1.1, i32 3
br label %for.inc
case2:
%cmp.c2 = icmp slt i32 %count, 50
%cmp2.c2 = icmp sgt i32 %count, 100
%state2.1 = select i1 %cmp.c2, i32 1, i32 2
%state2.2 = select i1 %cmp2.c2, i32 3, i32 %state2.1
br label %for.inc
for.inc:
%state.next = phi i32 [ %state1.2, %case1 ], [ %state2.2, %case2 ], [ 1, %for.body ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @test3(i32 %num) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt4:
; CHECK-NEXT: [[COUNT_JT4:%.*]] = phi i32 [ [[INC_JT4:%.*]], [[FOR_INC_JT4:%.*]] ]
; CHECK-NEXT: [[STATE_JT4:%.*]] = phi i32 [ [[STATE_NEXT_JT4:%.*]], [[FOR_INC_JT4]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt3:
; CHECK-NEXT: [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ]
; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT5:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[COUNT4]], 50
; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[COUNT4]], 100
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP_3]], label [[SI_UNFOLD_TRUE:%.*]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.true:
; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE2:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_INC_JT3]], label [[SI_UNFOLD_FALSE1:%.*]]
; CHECK: si.unfold.false1:
; CHECK-NEXT: br label [[FOR_INC_JT4]]
; CHECK: si.unfold.false2:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt4:
; CHECK-NEXT: [[STATE_NEXT_JT4]] = phi i32 [ 4, [[SI_UNFOLD_FALSE1]] ]
; CHECK-NEXT: [[INC_JT4]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT4:%.*]] = icmp slt i32 [[INC_JT4]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT4]], label [[FOR_BODY_JT4:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt3:
; CHECK-NEXT: [[STATE_NEXT_JT3]] = phi i32 [ 3, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT3]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT3:%.*]] = icmp slt i32 [[INC_JT3]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT3]], label [[FOR_BODY_JT3:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT6:%.*]] = phi i32 [ [[COUNT4]], [[SI_UNFOLD_FALSE2]] ], [ [[COUNT5]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[CASE1]] ], [ 2, [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT6]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT_JT4]], [[FOR_BODY_JT4]] ], [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT4]], [[SI_UNFOLD_TRUE]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[SI_UNFOLD_TRUE]] ], [ 1, [[FOR_BODY_JT3]] ], [ 1, [[FOR_BODY_JT4]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp.1 = icmp slt i32 %count, 50
%cmp.2 = icmp slt i32 %count, 100
%0 = and i32 %count, 1
%cmp.3 = icmp eq i32 %0, 0
%sel.1 = select i1 %cmp.1, i32 1, i32 2
%sel.2 = select i1 %cmp.2, i32 3, i32 4
%sel.3 = select i1 %cmp.3, i32 %sel.1, i32 %sel.2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel.3, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}