1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[MBP] Avoid tail duplication if it can't bring benefit

Current tail duplication integrated in bb layout is designed to increase the fallthrough from a BB's predecessor to its successor, but we have observed cases that duplication doesn't increase fallthrough, or it brings too much size overhead.

To overcome these two issues in function canTailDuplicateUnplacedPreds I add two checks:

  make sure there is at least one duplication in current work set.
  the number of duplication should not exceed the number of successors.

The modification in hasBetterLayoutPredecessor fixes a bug that potential predecessor must be at the bottom of a chain.

Differential Revision: https://reviews.llvm.org/D64376
This commit is contained in:
Guozhi Wei 2019-12-04 16:01:20 -08:00
parent ccd0b82a9d
commit 2be00d9e54
25 changed files with 944 additions and 655 deletions

View File

@ -1074,6 +1074,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (!shouldTailDuplicate(Succ))
return false;
// The result of canTailDuplicate.
bool Duplicate = true;
// Number of possible duplication.
unsigned int NumDup = 0;
// For CFG checking.
SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
BB->succ_end());
@ -1120,9 +1125,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// to trellises created by tail-duplication, so we just look for the
// CFG.
continue;
return false;
Duplicate = false;
continue;
}
NumDup++;
}
// No possible duplication in current filter set.
if (NumDup == 0)
return false;
// This is mainly for function exit BB.
// The integrated tail duplication is really designed for increasing
// fallthrough from predecessors from Succ to its successors. We may need
// other machanism to handle different cases.
if (Succ->succ_size() == 0)
return true;
// Plus the already placed predecessor.
NumDup++;
// If the duplication candidate has more unplaced predecessors than
// successors, the extra duplication can't bring more fallthrough.
//
// Pred1 Pred2 Pred3
// \ | /
// \ | /
// \ | /
// Dup
// / \
// / \
// Succ1 Succ2
//
// In this example Dup has 2 successors and 3 predecessors, duplication of Dup
// can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
// but the duplication into Pred3 can't increase fallthrough.
//
// A small number of extra duplication may not hurt too much. We need a better
// heuristic to handle it.
//
// FIXME: we should selectively tail duplicate a BB into part of its
// predecessors.
if ((NumDup > Succ->succ_size()) || !Duplicate)
return false;
return true;
}
@ -1418,9 +1464,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
BlockChain *PredChain = BlockToChain[Pred];
if (Pred == Succ || PredChain == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
BlockToChain[Pred] == &Chain ||
PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
// This check is redundant except for look ahead. This function is
// called for lookahead by isProfitableToTailDup when BB hasn't been
// placed yet.
@ -1722,7 +1769,9 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock* BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
if (allowTailDupPlacement())
ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
Chain,
BlockFilter));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at

View File

@ -73,7 +73,7 @@ define hidden i32 @test2() local_unnamed_addr {
; CHECK-LABEL: test3:
; CHECK-LABEL: $d.9:
; CHECK-LABEL: $x.10:
; CHECK-NEXT: b #20 <$x.12+0x8>
; CHECK-NEXT: b #-20 <test3+0x18>
; CHECK-LABEL: $x.12:
; CHECK-NEXT: b #4 <$x.12+0x4>
; CHECK-NEXT: mov w0, wzr

View File

@ -168,12 +168,12 @@ normal:
define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
; CHECK-APPLE-LABEL: foo_loop:
; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: fcmp
; CHECK-APPLE: b.gt
; CHECK-APPLE: cbz
; CHECK-APPLE: mov w0, #16
; CHECK-APPLE: malloc
; CHECK-APPLE: strb w{{.*}}, [x0, #8]
; CHECK-APPLE: fcmp
; CHECK-APPLE: b.le
; CHECK-APPLE: mov x21, x0
; CHECK-APPLE: ret

View File

@ -153,7 +153,7 @@ if.then2:
br i1 %tst3, label %if.then3, label %if.end
; CHECK: tst x0, x1, lsl #63
; CHECK: b.ge
; CHECK: b.lt
if.then3:
%shifted_op2 = shl i64 %val2, 62

View File

@ -9,9 +9,9 @@ name: loop_header_nopred
body: |
; GCN-LABEL: name: loop_header_nopred
; GCN: bb.0:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GCN: S_CBRANCH_VCCZ %bb.3, implicit $vcc
; GCN: S_BRANCH %bb.2
; GCN: successors: %bb.1(0x80000000)
; GCN: S_INST_PREFETCH 1
; GCN: S_BRANCH %bb.1
; GCN: bb.6 (align 64):
; GCN: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc

View File

@ -330,13 +330,14 @@ endif:
; GCN-LABEL: {{^}}divergent_inside_uniform:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
; GCN: [[ENDIF_LABEL:[0-9_A-Za-z]+]]:
; GCN: [[IF_LABEL]]:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; GCN: ; mask branch [[ENDIF_LABEL]]
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
; GCN: [[ENDIF_LABEL]]:
; GCN: s_endpgm
define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
entry:

View File

@ -210,13 +210,12 @@ body: |
; CHECK: bb.5.if.else.i:
; CHECK: successors: %bb.11(0x80000000)
; CHECK: B %bb.11
; CHECK: bb.8.while.body.i (align 16):
; CHECK: successors: %bb.11(0x04000000), %bb.9(0x7c000000)
; CHECK: BCC 76, killed renamable $cr0, %bb.11
; CHECK: bb.11:
; CHECK: renamable $x3 = LI8 1
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
; CHECK: bb.8.while.body.i (align 16):
; CHECK: successors: %bb.11(0x04000000), %bb.9(0x7c000000)
; CHECK: BCC 76, killed renamable $cr0, %bb.11
...

View File

@ -8,13 +8,11 @@ target triple = "powerpc-unknown-linux-gnu"
; The last (whichever it is) should have a fallthrough exit, and the other three
; need an unconditional branch. No other block should have an unconditional
; branch to cond_next48
; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
; of %cond_next48, so there should only be two unconditional branches.
;CHECK: b .LBB0_13
;CHECK: b .LBB0_13
;CHECK-NOT: b .LBB0_13
;CHECK: .LBB0_13: # %cond_next48
;CHECK: .LBB0_7: # %cond_next48
;CHECK: b .LBB0_7
;CHECK: b .LBB0_7
;CHECK: b .LBB0_7
define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
entry:

View File

@ -137,6 +137,7 @@ _ZNK4llvm9StringRef6substrEmm.exit:
; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]]
; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
; CHECK-NEXT: [[TRUE]]
; CHECK-NEXT: # in Loop: Header
; CHECK-NEXT: addi {{r[0-9]+}}, {{r[0-9]+}}, 0
; CHECK-NEXT: [[SUCCESSOR]]
}

View File

@ -0,0 +1,91 @@
; RUN: llc -O2 < %s | FileCheck %s
target triple = "powerpc64le-grtev4-linux-gnu"
; No duplication of loop header into entry block.
define void @no_duplicate1(i64 %a) {
; CHECK-LABEL: no_duplicate1
; CHECK: mr 30, 3
; CHECK-NEXT: b .LBB0_2
; CHECK: .LBB0_2:
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpldi 30, 100
; CHECK-NEXT: bne 0, .LBB0_1
entry:
br label %header
header:
%ind = phi i64 [%a, %entry], [%val3, %latch]
%cond1 = icmp eq i64 %ind, 100
br i1 %cond1, label %middle, label %latch
middle:
%condx = call i1 @foo()
%val1 = xor i64 %ind, 2
br label %latch
latch:
%val2 = phi i64 [%ind, %header], [%val1, %middle]
%val3 = add i64 %val2, 1
%cond2 = call i1 @foo()
br i1 %cond2, label %end, label %header
end:
ret void
}
; No duplication of loop header into latches.
define void @no_duplicate2(i64 %a) {
; CHECK-LABEL: no_duplicate2
; CHECK: mr 30, 3
; CHECK-NEXT: b .LBB1_2
; CHECK: .LBB1_2:
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpldi 30, 100
; CHECK-NEXT: bne 0, .LBB1_1
; CHECK: %latch2
; CHECK: b .LBB1_2
; CHECK: %latch3
; CHECK: b .LBB1_2
entry:
br label %header
header:
%ind = phi i64 [%a, %entry], [%val1, %latch1], [%val2, %latch2], [%val2, %latch3]
%cond1 = icmp eq i64 %ind, 100
br i1 %cond1, label %middle1, label %latch1
latch1:
%cond2 = call i1 @foo()
%val1 = xor i64 %ind, 2
br i1 %cond2, label %end, label %header
middle1:
%cond3 = call i1 @foo()
br i1 %cond3, label %latch1, label %middle2
middle2:
%cond4 = call i1 @foo()
%val2 = add i64 %ind, 1
br i1 %cond4, label %latch2, label %latch3
latch2:
call void @a()
br label %header
latch3:
call void @b()
br label %header
end:
ret void
}
declare i1 @foo()
declare void @a()
declare void @b()

File diff suppressed because it is too large Load Diff

View File

@ -52,20 +52,19 @@ define i32 @test() nounwind {
; RV32I-NEXT: lui s9, %hi(h)
; RV32I-NEXT: lui s10, %hi(c)
; RV32I-NEXT: lui s11, %hi(b)
; RV32I-NEXT: lw a1, %lo(l)(s2)
; RV32I-NEXT: bnez a1, .LBB0_4
; RV32I-NEXT: j .LBB0_5
; RV32I-NEXT: j .LBB0_3
; RV32I-NEXT: .LBB0_2: # %for.inc
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(a)(s6)
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: sw a0, %lo(a)(s6)
; RV32I-NEXT: beqz a0, .LBB0_11
; RV32I-NEXT: # %bb.3: # %for.body
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: .LBB0_3: # %for.body
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: lw a1, %lo(l)(s2)
; RV32I-NEXT: beqz a1, .LBB0_5
; RV32I-NEXT: .LBB0_4: # %if.then
; RV32I-NEXT: # %bb.4: # %if.then
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a1, %lo(b)(s11)
; RV32I-NEXT: lw a2, %lo(c)(s10)
; RV32I-NEXT: lw a3, %lo(d)(s1)
@ -73,11 +72,11 @@ define i32 @test() nounwind {
; RV32I-NEXT: addi a5, zero, 32
; RV32I-NEXT: call foo
; RV32I-NEXT: .LBB0_5: # %if.end
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(k)(s3)
; RV32I-NEXT: beqz a0, .LBB0_7
; RV32I-NEXT: # %bb.6: # %if.then3
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(b)(s11)
; RV32I-NEXT: lw a1, %lo(c)(s10)
; RV32I-NEXT: lw a2, %lo(d)(s1)
@ -86,11 +85,11 @@ define i32 @test() nounwind {
; RV32I-NEXT: addi a5, zero, 64
; RV32I-NEXT: call foo
; RV32I-NEXT: .LBB0_7: # %if.end5
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(j)(s4)
; RV32I-NEXT: beqz a0, .LBB0_9
; RV32I-NEXT: # %bb.8: # %if.then7
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(c)(s10)
; RV32I-NEXT: lw a1, %lo(d)(s1)
; RV32I-NEXT: lw a2, %lo(e)(s0)
@ -99,11 +98,11 @@ define i32 @test() nounwind {
; RV32I-NEXT: addi a5, zero, 32
; RV32I-NEXT: call foo
; RV32I-NEXT: .LBB0_9: # %if.end9
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(i)(s5)
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.10: # %if.then11
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
; RV32I-NEXT: lw a0, %lo(d)(s1)
; RV32I-NEXT: lw a1, %lo(e)(s0)
; RV32I-NEXT: lw a2, %lo(f)(s7)

View File

@ -31,9 +31,14 @@ return: ; preds = %bb, %entry
define i32 @test_dead_cycle(i32 %n) nounwind {
; CHECK-LABEL: test_dead_cycle:
; CHECK: subs
; also check for duplicate induction variables (radar 7645034)
; CHECK: subs r{{.*}}, #1
; CHECK-NOT: subs r{{.*}}, #1
; CHECK: bl
; CHECK-NOT: mov
; CHECK: bl
; CHECK: pop
entry:
%0 = icmp eq i32 %n, 1 ; <i1> [#uses=1]
br i1 %0, label %return, label %bb.nph
@ -58,10 +63,6 @@ bb1: ; preds = %bb
br label %bb2
bb2: ; preds = %bb1, %bb
; also check for duplicate induction variables (radar 7645034)
; CHECK: subs r{{.*}}, #1
; CHECK-NOT: subs r{{.*}}, #1
; CHECK: pop
%u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ] ; <i64> [#uses=2]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %tmp ; <i1> [#uses=1]

View File

@ -1532,7 +1532,7 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: b .LBB9_6
; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: b .LBB9_9
; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new
; CHECK-NEXT: subs r2, r2, r4
; CHECK-NEXT: movs r3, #1
@ -1693,7 +1693,7 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: b .LBB10_6
; CHECK-NEXT: .LBB10_3:
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: b .LBB10_9
; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new
; CHECK-NEXT: subs r2, r2, r4
; CHECK-NEXT: movs r3, #1
@ -1854,7 +1854,7 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: b .LBB11_6
; CHECK-NEXT: .LBB11_3:
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: b .LBB11_9
; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new
; CHECK-NEXT: subs r2, r2, r7
; CHECK-NEXT: movs r3, #1

View File

@ -5,7 +5,7 @@ declare void @y()
define void @f(i32 %x, i32 %y) {
; CHECK-LABEL: f:
; CHECK: cbnz
; CHECK: cbz
%p = icmp eq i32 %x, 0
br i1 %p, label %t, label %f
@ -26,7 +26,7 @@ t:
call void @x()
call void @x()
call void @x()
; CHECK: cbz
; CHECK: bne
%q = icmp eq i32 %y, 0
br i1 %q, label %t2, label %f

View File

@ -16,17 +16,14 @@ define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) {
; CHECK-NEXT: # %bb.2: # %if.B
; CHECK-NEXT: pshufw $238, %mm0, %mm0 # mm0 = mm0[2,3,2,3]
; CHECK-NEXT: movq %mm0, %rax
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jne .LBB0_4
; CHECK-NEXT: jmp .LBB0_3
; CHECK-NEXT: .LBB0_1: # %if.A
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movd %edx, %mm1
; CHECK-NEXT: psllq %mm1, %mm0
; CHECK-NEXT: movq %mm0, %rax
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: jne .LBB0_4
; CHECK-NEXT: # %bb.3: # %if.C
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: .LBB0_3: # %if.C
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: .LBB0_4: # %merge

View File

@ -70,8 +70,15 @@ define dso_local void @fn() {
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: movb %dh, %dl
; CHECK-NEXT: jne .LBB0_16
; CHECK-NEXT: jmp .LBB0_6
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: # %bb.6: # %for.cond35
; CHECK-NEXT: .LBB0_3: # %if.then
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $.str, (%esp)
; CHECK-NEXT: calll printf
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
; CHECK-NEXT: # implicit-def: $eax
; CHECK-NEXT: .LBB0_6: # %for.cond35
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB0_7
@ -96,22 +103,10 @@ define dso_local void @fn() {
; CHECK-NEXT: # implicit-def: $ebp
; CHECK-NEXT: jmp .LBB0_20
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_3: # %if.then
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $.str, (%esp)
; CHECK-NEXT: calll printf
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
; CHECK-NEXT: # implicit-def: $eax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: jne .LBB0_11
; CHECK-NEXT: jmp .LBB0_7
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_8: # %if.end21
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: # implicit-def: $ebp
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: je .LBB0_13
; CHECK-NEXT: jmp .LBB0_10
; CHECK-NEXT: jmp .LBB0_9
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: xorl %edi, %edi
@ -127,11 +122,11 @@ define dso_local void @fn() {
; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne .LBB0_20
; CHECK-NEXT: # %bb.9: # %ae
; CHECK-NEXT: .LBB0_9: # %ae
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne .LBB0_10
; CHECK-NEXT: .LBB0_13: # %if.end26
; CHECK-NEXT: # %bb.13: # %if.end26
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testb %dl, %dl

View File

@ -114,7 +114,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jle LBB0_22
; CHECK-NEXT: LBB0_13: ## %while.body200
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_30 Depth 2
; CHECK-NEXT: ## Child Loop BB0_29 Depth 2
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
; CHECK-NEXT: leal -268(%rbp), %eax
; CHECK-NEXT: cmpl $105, %eax
@ -160,27 +160,25 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ## implicit-def: $rax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jns LBB0_30
; CHECK-NEXT: jmp LBB0_55
; CHECK-NEXT: jmp LBB0_29
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: leaq 1(%r14), %rax
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: je LBB0_33
; CHECK-NEXT: ## %bb.29: ## %land.rhs485
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: js LBB0_55
; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780
; CHECK-NEXT: LBB0_29: ## %land.rhs485
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: js LBB0_55
; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_32
; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
; CHECK-NEXT: callq ___maskrune
; CHECK-NEXT: testb %bl, %bl

View File

@ -36,24 +36,24 @@ define i32 @test_branches_order() uwtable ssp {
; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %r14
; CHECK-NEXT: movq %rsp, %r15
; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7
; CHECK-NEXT: jle LBB0_2
; CHECK-NEXT: jmp LBB0_7
; CHECK-NEXT: jmp LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_6: ## %for.inc9
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: incl %r12d
; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7
; CHECK-NEXT: jg LBB0_7
; CHECK-NEXT: LBB0_2: ## %for.cond1.preheader
; CHECK-NEXT: LBB0_1: ## %for.cond
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_3 Depth 2
; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7
; CHECK-NEXT: jg LBB0_7
; CHECK-NEXT: ## %bb.2: ## %for.cond1.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $-1, %r13d
; CHECK-NEXT: movq %r15, %rbx
; CHECK-NEXT: movq %r14, %rbp
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_3: ## %for.cond1
; CHECK-NEXT: ## Parent Loop BB0_2 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_1 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
; CHECK-NEXT: incl %r13d
; CHECK-NEXT: cmpl $999, %r13d ## imm = 0x3E7
@ -74,47 +74,45 @@ define i32 @test_branches_order() uwtable ssp {
; CHECK-NEXT: callq _puts
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movq %rsp, %rcx
; CHECK-NEXT: cmpl $999, %eax ## imm = 0x3E7
; CHECK-NEXT: jle LBB0_9
; CHECK-NEXT: jmp LBB0_16
; CHECK-NEXT: jmp LBB0_8
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_15: ## %for.inc38
; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: incl %eax
; CHECK-NEXT: LBB0_8: ## %for.cond14
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_10 Depth 2
; CHECK-NEXT: ## Child Loop BB0_12 Depth 3
; CHECK-NEXT: cmpl $999, %eax ## imm = 0x3E7
; CHECK-NEXT: jg LBB0_16
; CHECK-NEXT: LBB0_9: ## %for.cond18.preheader
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_11 Depth 2
; CHECK-NEXT: ## Child Loop BB0_12 Depth 3
; CHECK-NEXT: ## %bb.9: ## %for.cond18.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: movq %rcx, %rdx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
; CHECK-NEXT: jle LBB0_11
; CHECK-NEXT: jmp LBB0_15
; CHECK-NEXT: jmp LBB0_10
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_14: ## %exit
; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2
; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2
; CHECK-NEXT: addq %rsi, %rbp
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: decq %rsi
; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9
; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18
; CHECK-NEXT: jne LBB0_5
; CHECK-NEXT: ## %bb.10: ## %for.cond18
; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
; CHECK-NEXT: jg LBB0_15
; CHECK-NEXT: LBB0_11: ## %for.body20
; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1
; CHECK-NEXT: LBB0_10: ## %for.cond18
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Loop Header: Depth=2
; CHECK-NEXT: ## Child Loop BB0_12 Depth 3
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
; CHECK-NEXT: jg LBB0_15
; CHECK-NEXT: ## %bb.11: ## %for.body20
; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2
; CHECK-NEXT: movq $-1000, %rbp ## imm = 0xFC18
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_12: ## %do.body.i
; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_11 Depth=2
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_10 Depth=2
; CHECK-NEXT: ## => This Inner Loop Header: Depth=3
; CHECK-NEXT: cmpb $120, 1000(%rdx,%rbp)
; CHECK-NEXT: je LBB0_14

View File

@ -141,15 +141,14 @@ define i32 @foo(i32 %i) local_unnamed_addr {
; X86_64-NEXT: #EH_SjLj_Setup LBB1_4
; X86_64-NEXT: ## %bb.1: ## %entry
; X86_64-NEXT: xorl %eax, %eax
; X86_64-NEXT: testl %eax, %eax
; X86_64-NEXT: jne LBB1_3
; X86_64-NEXT: jmp LBB1_5
; X86_64-NEXT: jmp LBB1_2
; X86_64-NEXT: LBB1_4: ## Block address taken
; X86_64-NEXT: ## %entry
; X86_64-NEXT: movl $1, %eax
; X86_64-NEXT: LBB1_2: ## %entry
; X86_64-NEXT: testl %eax, %eax
; X86_64-NEXT: je LBB1_5
; X86_64-NEXT: LBB1_3: ## %if.end
; X86_64-NEXT: ## %bb.3: ## %if.end
; X86_64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
; X86_64-NEXT: shll $2, %eax
; X86_64-NEXT: leal (%rax,%rax,2), %eax
@ -190,15 +189,14 @@ define i32 @foo(i32 %i) local_unnamed_addr {
; X86-NEXT: #EH_SjLj_Setup LBB1_4
; X86-NEXT: ## %bb.1: ## %entry
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testl %eax, %eax
; X86-NEXT: jne LBB1_3
; X86-NEXT: jmp LBB1_5
; X86-NEXT: jmp LBB1_2
; X86-NEXT: LBB1_4: ## Block address taken
; X86-NEXT: ## %entry
; X86-NEXT: movl $1, %eax
; X86-NEXT: LBB1_2: ## %entry
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je LBB1_5
; X86-NEXT: LBB1_3: ## %if.end
; X86-NEXT: ## %bb.3: ## %if.end
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: shll $2, %eax
; X86-NEXT: leal (%eax,%eax,2), %eax

View File

@ -411,8 +411,18 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
; X64-LFENCE-NEXT: pushq %rbx
; X64-LFENCE-NEXT: pushq %rax
; X64-LFENCE-NEXT: testl %edi, %edi
; X64-LFENCE-NEXT: jne .LBB3_6
; X64-LFENCE-NEXT: # %bb.1: # %l1.header.preheader
; X64-LFENCE-NEXT: je .LBB3_1
; X64-LFENCE-NEXT: .LBB3_6: # %exit
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: addq $8, %rsp
; X64-LFENCE-NEXT: popq %rbx
; X64-LFENCE-NEXT: popq %r12
; X64-LFENCE-NEXT: popq %r13
; X64-LFENCE-NEXT: popq %r14
; X64-LFENCE-NEXT: popq %r15
; X64-LFENCE-NEXT: popq %rbp
; X64-LFENCE-NEXT: retq
; X64-LFENCE-NEXT: .LBB3_1: # %l1.header.preheader
; X64-LFENCE-NEXT: movq %r8, %r14
; X64-LFENCE-NEXT: movq %rcx, %rbx
; X64-LFENCE-NEXT: movl %edx, %r13d
@ -452,16 +462,6 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
; X64-LFENCE-NEXT: cmpl %r13d, %ebp
; X64-LFENCE-NEXT: jl .LBB3_4
; X64-LFENCE-NEXT: jmp .LBB3_5
; X64-LFENCE-NEXT: .LBB3_6: # %exit
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: addq $8, %rsp
; X64-LFENCE-NEXT: popq %rbx
; X64-LFENCE-NEXT: popq %r12
; X64-LFENCE-NEXT: popq %r13
; X64-LFENCE-NEXT: popq %r14
; X64-LFENCE-NEXT: popq %r15
; X64-LFENCE-NEXT: popq %rbp
; X64-LFENCE-NEXT: retq
entry:
%a.cmp = icmp eq i32 %a, 0
br i1 %a.cmp, label %l1.header, label %exit

View File

@ -5,22 +5,23 @@
define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 {
; CHECK-LABEL: tail_dup_merge_loops:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: jmp .LBB0_5
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_3: # %inner_loop_exit
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: incq %rsi
; CHECK-NEXT: .LBB0_1: # %outer_loop_top
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB0_5
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %inner_loop_top
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-NEXT: # %bb.2: # %inner_loop_top
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmpb $0, (%rsi)
; CHECK-NEXT: js .LBB0_3
; CHECK-NEXT: .LBB0_4: # %inner_loop_latch
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: addq $2, %rsi
; CHECK-NEXT: cmpb $0, (%rsi)
@ -97,7 +98,7 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_26
; CHECK-NEXT: jne .LBB1_27
; CHECK-NEXT: # %bb.1: # %if.end19
; CHECK-NEXT: movl %esi, %r13d
; CHECK-NEXT: movq %rdi, %r12
@ -108,71 +109,30 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: movq %r15, %rdi
; CHECK-NEXT: callq cli_calloc
; CHECK-NEXT: testl %r13d, %r13d
; CHECK-NEXT: je .LBB1_25
; CHECK-NEXT: je .LBB1_26
; CHECK-NEXT: # %bb.2: # %if.end19
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: je .LBB1_25
; CHECK-NEXT: je .LBB1_26
; CHECK-NEXT: # %bb.3: # %if.end19
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_25
; CHECK-NEXT: jne .LBB1_26
; CHECK-NEXT: # %bb.4: # %if.end19
; CHECK-NEXT: cmpq %r12, %rbx
; CHECK-NEXT: jb .LBB1_25
; CHECK-NEXT: jb .LBB1_26
; CHECK-NEXT: # %bb.5: # %if.end50
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: movq %r15, %rdx
; CHECK-NEXT: callq memcpy
; CHECK-NEXT: cmpl $4, %r14d
; CHECK-NEXT: jb .LBB1_28
; CHECK-NEXT: jb .LBB1_29
; CHECK-NEXT: # %bb.6: # %shared_preheader
; CHECK-NEXT: movb $32, %dl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: # implicit-def: $rcx
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: jmp .LBB1_9
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_9: # %shared_loop_header
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testq %rbx, %rbx
; CHECK-NEXT: jne .LBB1_27
; CHECK-NEXT: # %bb.10: # %inner_loop_body
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jns .LBB1_9
; CHECK-NEXT: # %bb.11: # %if.end96.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: cmpl $3, %ebp
; CHECK-NEXT: jae .LBB1_22
; CHECK-NEXT: # %bb.12: # %if.end287.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: cmpl $1, %ebp
; CHECK-NEXT: setne %dl
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_16
; CHECK-NEXT: # %bb.13: # %if.end308.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB1_7
; CHECK-NEXT: # %bb.14: # %if.end335.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: movl $0, %esi
; CHECK-NEXT: jne .LBB1_8
; CHECK-NEXT: # %bb.15: # %merge_other
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: jmp .LBB1_17
; CHECK-NEXT: .LBB1_16: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: movb %dl, %sil
; CHECK-NEXT: addl $3, %esi
; CHECK-NEXT: .LBB1_17: # %outer_loop_latch
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: jmp .LBB1_8
; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: movb $32, %dl
@ -183,26 +143,72 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: decl %esi
; CHECK-NEXT: movzwl %si, %esi
; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx
; CHECK-NEXT: .LBB1_9: # %outer_loop_header
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_10 Depth 2
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: jne .LBB1_9
; CHECK-NEXT: .LBB1_18: # %while.cond.us1412.i
; CHECK-NEXT: je .LBB1_19
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_10: # %shared_loop_header
; CHECK-NEXT: # Parent Loop BB1_9 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: testq %rbx, %rbx
; CHECK-NEXT: jne .LBB1_28
; CHECK-NEXT: # %bb.11: # %inner_loop_body
; CHECK-NEXT: # in Loop: Header=BB1_10 Depth=2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jns .LBB1_10
; CHECK-NEXT: # %bb.12: # %if.end96.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: cmpl $3, %ebp
; CHECK-NEXT: jae .LBB1_23
; CHECK-NEXT: # %bb.13: # %if.end287.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: cmpl $1, %ebp
; CHECK-NEXT: setne %dl
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_17
; CHECK-NEXT: # %bb.14: # %if.end308.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB1_7
; CHECK-NEXT: # %bb.15: # %if.end335.i
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: movl $0, %esi
; CHECK-NEXT: jne .LBB1_8
; CHECK-NEXT: # %bb.16: # %merge_other
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: jmp .LBB1_18
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_17: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: movb %dl, %sil
; CHECK-NEXT: addl $3, %esi
; CHECK-NEXT: .LBB1_18: # %outer_loop_latch
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: jmp .LBB1_8
; CHECK-NEXT: .LBB1_26:
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: jmp .LBB1_27
; CHECK-NEXT: .LBB1_19: # %while.cond.us1412.i
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: jne .LBB1_20
; CHECK-NEXT: # %bb.19: # %while.cond.us1412.i
; CHECK-NEXT: jne .LBB1_21
; CHECK-NEXT: # %bb.20: # %while.cond.us1412.i
; CHECK-NEXT: decb %dl
; CHECK-NEXT: jne .LBB1_26
; CHECK-NEXT: .LBB1_20: # %if.end41.us1436.i
; CHECK-NEXT: .LBB1_25:
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: jmp .LBB1_26
; CHECK-NEXT: .LBB1_22: # %if.then99.i
; CHECK-NEXT: jne .LBB1_27
; CHECK-NEXT: .LBB1_21: # %if.end41.us1436.i
; CHECK-NEXT: .LBB1_23: # %if.then99.i
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: movl $.str.6, %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: callq cli_dbgmsg
; CHECK-NEXT: .LBB1_26: # %cleanup
; CHECK-NEXT: .LBB1_27: # %cleanup
; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
@ -212,8 +218,8 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB1_27: # %wunpsect.exit.thread.loopexit389
; CHECK-NEXT: .LBB1_28: # %wunpsect.exit.thread.loopexit391
; CHECK-NEXT: .LBB1_28: # %wunpsect.exit.thread.loopexit389
; CHECK-NEXT: .LBB1_29: # %wunpsect.exit.thread.loopexit391
entry:
%0 = load i32, i32* undef, align 4
%mul = shl nsw i32 %0, 2

View File

@ -10,28 +10,33 @@
define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 {
; CHECK-LABEL: repeated_tail_dup:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %land.lhs.true
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $10, (%rdx)
; CHECK-NEXT: .LBB0_6: # %dup2
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $2, (%rcx)
; CHECK-NEXT: testl %r9d, %r9d
; CHECK-NEXT: jne .LBB0_8
; CHECK-NEXT: .LBB0_1: # %for.cond
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: .LBB0_3: # %if.end56
; CHECK-NEXT: # %bb.3: # %if.end56
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: je .LBB0_5
; CHECK-NEXT: # %bb.4: # %if.then64
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movb $1, (%r8)
; CHECK-NEXT: testl %r9d, %r9d
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: jmp .LBB0_8
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_5: # %if.end70
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $12, (%rdx)
; CHECK-NEXT: jmp .LBB0_6
; CHECK-NEXT: .LBB0_8: # %for.end

View File

@ -18,30 +18,30 @@ body: |
; CHECK: TEST64rr $rax, $rax, implicit-def $eflags
; CHECK: JCC_1 %bb.1, 4, implicit $eflags
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x30000000), %bb.8(0x50000000)
; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000)
; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8)
; CHECK: JCC_1 %bb.8, 5, implicit $eflags
; CHECK: JCC_1 %bb.6, 4, implicit $eflags
; CHECK: bb.4:
; CHECK: successors: %bb.1(0x30000000), %bb.5(0x50000000)
; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags
; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al
; CHECK: RETQ $eax
; CHECK: bb.6:
; CHECK: successors: %bb.1(0x30000000), %bb.7(0x50000000)
; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8)
; CHECK: TEST64rr $rax, $rax, implicit-def $eflags
; CHECK: JCC_1 %bb.1, 4, implicit $eflags
; CHECK: bb.5 (align 16):
; CHECK: successors: %bb.6(0x71555555), %bb.8(0x0eaaaaab)
; CHECK: bb.7 (align 16):
; CHECK: successors: %bb.8(0x71555555), %bb.4(0x0eaaaaab)
; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8)
; CHECK: JCC_1 %bb.8, 5, implicit $eflags
; CHECK: bb.6:
; CHECK: successors: %bb.1(0x04000000), %bb.5(0x7c000000)
; CHECK: JCC_1 %bb.4, 5, implicit $eflags
; CHECK: bb.8:
; CHECK: successors: %bb.1(0x04000000), %bb.7(0x7c000000)
; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8)
; CHECK: TEST64rr $rax, $rax, implicit-def $eflags
; CHECK: JCC_1 %bb.5, 5, implicit $eflags
; CHECK: JCC_1 %bb.7, 5, implicit $eflags
; CHECK: bb.1:
; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags
; CHECK: RETQ $eax
; CHECK: bb.8:
; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags
; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al
; CHECK: RETQ $eax
bb.0:
successors: %bb.1(0x40000000), %bb.7(0x40000000)

View File

@ -242,75 +242,68 @@ define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_17
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movb 0, %bl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_16
; CHECK-NEXT: jne .LBB3_8
; CHECK-NEXT: # %bb.2: # %bb.i
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: je .LBB3_8
; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
; CHECK-NEXT: movq 0, %rax
; CHECK-NEXT: movzbl (%rax), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB3_12
; CHECK-NEXT: je .LBB3_10
; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
; CHECK-NEXT: cmpl $2, %ecx
; CHECK-NEXT: jne .LBB3_5
; CHECK-NEXT: # %bb.6: # %bb.i1
; CHECK-NEXT: jne .LBB3_15
; CHECK-NEXT: # %bb.5: # %bb.i1
; CHECK-NEXT: movq 32(%rax), %rax
; CHECK-NEXT: movzbl 16(%rax), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB3_10
; CHECK-NEXT: # %bb.7: # %bb.i1
; CHECK-NEXT: je .LBB3_13
; CHECK-NEXT: # %bb.6: # %bb.i1
; CHECK-NEXT: cmpl $2, %ecx
; CHECK-NEXT: jne .LBB3_8
; CHECK-NEXT: # %bb.9: # %bb.i.i
; CHECK-NEXT: jne .LBB3_15
; CHECK-NEXT: # %bb.7: # %bb.i.i
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: callq lvalue_p
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: setne %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_15
; CHECK-NEXT: jmp .LBB3_17
; CHECK-NEXT: .LBB3_16: # %bb1
; CHECK-NEXT: jmp .LBB3_16
; CHECK-NEXT: .LBB3_8: # %bb1
; CHECK-NEXT: cmpb $23, %bl
; CHECK-NEXT: .LBB3_17: # %bb3
; CHECK-NEXT: .LBB3_12: # %bb2.i3
; CHECK-NEXT: movq 8(%rax), %rax
; CHECK-NEXT: movb 16(%rax), %cl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpb $23, %cl
; CHECK-NEXT: je .LBB3_14
; CHECK-NEXT: # %bb.13: # %bb2.i3
; CHECK-NEXT: cmpb $16, %cl
; CHECK-NEXT: je .LBB3_14
; CHECK-NEXT: jmp .LBB3_17
; CHECK-NEXT: .LBB3_5:
; CHECK-NEXT: .LBB3_9: # %bb3
; CHECK-NEXT: .LBB3_15:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_15
; CHECK-NEXT: jmp .LBB3_17
; CHECK-NEXT: .LBB3_10: # %bb2.i.i2
; CHECK-NEXT: movq 8(%rax), %rax
; CHECK-NEXT: movb 16(%rax), %cl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpb $16, %cl
; CHECK-NEXT: je .LBB3_14
; CHECK-NEXT: # %bb.11: # %bb2.i.i2
; CHECK-NEXT: cmpb $23, %cl
; CHECK-NEXT: je .LBB3_14
; CHECK-NEXT: jmp .LBB3_17
; CHECK-NEXT: .LBB3_8:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_17
; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: .LBB3_10: # %bb2.i3
; CHECK-NEXT: movq 8(%rax), %rax
; CHECK-NEXT: movb 16(%rax), %cl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpb $23, %cl
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: # %bb.11: # %bb2.i3
; CHECK-NEXT: cmpb $16, %cl
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: jmp .LBB3_9
; CHECK-NEXT: .LBB3_13: # %bb2.i.i2
; CHECK-NEXT: movq 8(%rax), %rax
; CHECK-NEXT: movb 16(%rax), %cl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpb $16, %cl
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: # %bb.14: # %bb2.i.i2
; CHECK-NEXT: cmpb $23, %cl
; CHECK-NEXT: je .LBB3_16
; CHECK-NEXT: jmp .LBB3_9
entry:
%tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
switch i8 %tmp4, label %bb3 [