mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
ae6be9df60
Some code gen passes use MBFIWrapper to keep track of the frequency of new blocks. This was not taken into account and could lead to incorrect frequencies as MBFI silently returns zero frequency for unknown/new blocks. Add a variant for MBFIWrapper in the PGSO query interface. Depends on D73494.
931 lines
26 KiB
LLVM
931 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
|
|
|
|
declare void @bar(i32)
|
|
declare void @car(i32)
|
|
declare void @dar(i32)
|
|
declare void @ear(i32)
|
|
declare void @far(i32)
|
|
declare i1 @qux()
|
|
|
|
@GHJK = global i32 0
|
|
@HABC = global i32 0
|
|
|
|
; BranchFolding should tail-merge the stores since they all precede
|
|
; direct branches to the same place.
|
|
|
|
define void @tail_merge_me() nounwind {
|
|
; CHECK-LABEL: tail_merge_me:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_1
|
|
; CHECK-NEXT: # %bb.6: # %A
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq bar
|
|
; CHECK-NEXT: jmp .LBB0_4
|
|
; CHECK-NEXT: .LBB0_1: # %next
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_3
|
|
; CHECK-NEXT: # %bb.2: # %B
|
|
; CHECK-NEXT: movl $1, %edi
|
|
; CHECK-NEXT: callq car
|
|
; CHECK-NEXT: jmp .LBB0_4
|
|
; CHECK-NEXT: .LBB0_3: # %C
|
|
; CHECK-NEXT: movl $2, %edi
|
|
; CHECK-NEXT: callq dar
|
|
; CHECK-NEXT: .LBB0_4: # %M
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB0_5
|
|
; CHECK-NEXT: # %bb.7: # %return
|
|
; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
|
|
; CHECK-NEXT: callq ear
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB0_5: # %altret
|
|
; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
|
|
; CHECK-NEXT: callq far
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = call i1 @qux()
|
|
br i1 %a, label %A, label %next
|
|
next:
|
|
%b = call i1 @qux()
|
|
br i1 %b, label %B, label %C
|
|
|
|
A:
|
|
call void @bar(i32 0)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
B:
|
|
call void @car(i32 1)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
C:
|
|
call void @dar(i32 2)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
M:
|
|
store i32 1, i32* @HABC
|
|
%c = call i1 @qux()
|
|
br i1 %c, label %return, label %altret
|
|
|
|
return:
|
|
call void @ear(i32 1000)
|
|
ret void
|
|
altret:
|
|
call void @far(i32 1001)
|
|
ret void
|
|
}
|
|
|
|
declare i8* @choose(i8*, i8*)
|
|
|
|
; BranchFolding should tail-duplicate the indirect jump to avoid
|
|
; redundant branching.
|
|
|
|
define void @tail_duplicate_me() nounwind {
|
|
; CHECK-LABEL: tail_duplicate_me:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: movl $.Ltmp0, %edi
|
|
; CHECK-NEXT: movl $.Ltmp1, %esi
|
|
; CHECK-NEXT: movl %eax, %ebx
|
|
; CHECK-NEXT: callq choose
|
|
; CHECK-NEXT: movq %rax, %r14
|
|
; CHECK-NEXT: testb $1, %bl
|
|
; CHECK-NEXT: je .LBB1_1
|
|
; CHECK-NEXT: # %bb.7: # %A
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq bar
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
; CHECK-NEXT: .Ltmp0: # Block address taken
|
|
; CHECK-NEXT: .LBB1_4: # %return
|
|
; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
|
|
; CHECK-NEXT: callq ear
|
|
; CHECK-NEXT: jmp .LBB1_5
|
|
; CHECK-NEXT: .LBB1_1: # %next
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB1_3
|
|
; CHECK-NEXT: # %bb.2: # %B
|
|
; CHECK-NEXT: movl $1, %edi
|
|
; CHECK-NEXT: callq car
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
; CHECK-NEXT: .Ltmp1: # Block address taken
|
|
; CHECK-NEXT: .LBB1_6: # %altret
|
|
; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
|
|
; CHECK-NEXT: callq far
|
|
; CHECK-NEXT: .LBB1_5: # %return
|
|
; CHECK-NEXT: addq $8, %rsp
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB1_3: # %C
|
|
; CHECK-NEXT: movl $2, %edi
|
|
; CHECK-NEXT: callq dar
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmpq *%r14
|
|
entry:
|
|
%a = call i1 @qux()
|
|
%c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
|
|
i8* blockaddress(@tail_duplicate_me, %altret))
|
|
br i1 %a, label %A, label %next
|
|
next:
|
|
%b = call i1 @qux()
|
|
br i1 %b, label %B, label %C
|
|
|
|
A:
|
|
call void @bar(i32 0)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
B:
|
|
call void @car(i32 1)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
C:
|
|
call void @dar(i32 2)
|
|
store i32 0, i32* @GHJK
|
|
br label %M
|
|
|
|
M:
|
|
indirectbr i8* %c, [label %return, label %altret]
|
|
|
|
return:
|
|
call void @ear(i32 1000)
|
|
ret void
|
|
altret:
|
|
call void @far(i32 1001)
|
|
ret void
|
|
}
|
|
|
|
; BranchFolding shouldn't try to merge the tails of two blocks
|
|
; with only a branch in common, regardless of the fallthrough situation.
|
|
|
|
define i1 @dont_merge_oddly(float* %result) nounwind {
|
|
; CHECK-LABEL: dont_merge_oddly:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: ucomiss %xmm1, %xmm2
|
|
; CHECK-NEXT: jbe .LBB2_3
|
|
; CHECK-NEXT: # %bb.1: # %bb
|
|
; CHECK-NEXT: ucomiss %xmm0, %xmm1
|
|
; CHECK-NEXT: ja .LBB2_4
|
|
; CHECK-NEXT: .LBB2_2: # %bb30
|
|
; CHECK-NEXT: movb $1, %al
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB2_3: # %bb21
|
|
; CHECK-NEXT: ucomiss %xmm0, %xmm2
|
|
; CHECK-NEXT: jbe .LBB2_2
|
|
; CHECK-NEXT: .LBB2_4: # %bb26
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%tmp4 = getelementptr float, float* %result, i32 2
|
|
%tmp5 = load float, float* %tmp4, align 4
|
|
%tmp7 = getelementptr float, float* %result, i32 4
|
|
%tmp8 = load float, float* %tmp7, align 4
|
|
%tmp10 = getelementptr float, float* %result, i32 6
|
|
%tmp11 = load float, float* %tmp10, align 4
|
|
%tmp12 = fcmp olt float %tmp8, %tmp11
|
|
br i1 %tmp12, label %bb, label %bb21
|
|
|
|
bb:
|
|
%tmp23469 = fcmp olt float %tmp5, %tmp8
|
|
br i1 %tmp23469, label %bb26, label %bb30
|
|
|
|
bb21:
|
|
%tmp23 = fcmp olt float %tmp5, %tmp11
|
|
br i1 %tmp23, label %bb26, label %bb30
|
|
|
|
bb26:
|
|
ret i1 0
|
|
|
|
bb30:
|
|
ret i1 1
|
|
}
|
|
|
|
; Do any-size tail-merging when two candidate blocks will both require
|
|
; an unconditional jump to complete a two-way conditional branch.
|
|
;
|
|
; This test only works when register allocation happens to use %rax for both
|
|
; load addresses.
|
|
|
|
%0 = type { %struct.rtx_def* }
|
|
%struct.lang_decl = type opaque
|
|
%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
|
|
%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
|
|
%union..2anon = type { i32 }
|
|
%union.rtunion = type { i8* }
|
|
%union.tree_node = type { %struct.tree_decl }
|
|
|
|
define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
|
|
; CHECK-LABEL: c_expand_expr_stmt:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_9
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
; CHECK-NEXT: movb 0, %bl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_8
|
|
; CHECK-NEXT: # %bb.2: # %bb.i
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB3_8
|
|
; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
|
|
; CHECK-NEXT: movq 0, %rax
|
|
; CHECK-NEXT: movzbl (%rax), %ecx
|
|
; CHECK-NEXT: testl %ecx, %ecx
|
|
; CHECK-NEXT: je .LBB3_10
|
|
; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
|
|
; CHECK-NEXT: cmpl $2, %ecx
|
|
; CHECK-NEXT: jne .LBB3_15
|
|
; CHECK-NEXT: # %bb.5: # %bb.i1
|
|
; CHECK-NEXT: movq 32(%rax), %rax
|
|
; CHECK-NEXT: movzbl 16(%rax), %ecx
|
|
; CHECK-NEXT: testl %ecx, %ecx
|
|
; CHECK-NEXT: je .LBB3_13
|
|
; CHECK-NEXT: # %bb.6: # %bb.i1
|
|
; CHECK-NEXT: cmpl $2, %ecx
|
|
; CHECK-NEXT: jne .LBB3_15
|
|
; CHECK-NEXT: # %bb.7: # %bb.i.i
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: callq lvalue_p
|
|
; CHECK-NEXT: testl %eax, %eax
|
|
; CHECK-NEXT: setne %al
|
|
; CHECK-NEXT: jmp .LBB3_16
|
|
; CHECK-NEXT: .LBB3_8: # %bb1
|
|
; CHECK-NEXT: cmpb $23, %bl
|
|
; CHECK-NEXT: .LBB3_9: # %bb3
|
|
; CHECK-NEXT: .LBB3_15:
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: jne .LBB3_9
|
|
; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4
|
|
; CHECK-NEXT: testb %bl, %bl
|
|
; CHECK-NEXT: .LBB3_10: # %bb2.i3
|
|
; CHECK-NEXT: movq 8(%rax), %rax
|
|
; CHECK-NEXT: movb 16(%rax), %cl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpb $23, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: # %bb.11: # %bb2.i3
|
|
; CHECK-NEXT: cmpb $16, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: jmp .LBB3_9
|
|
; CHECK-NEXT: .LBB3_13: # %bb2.i.i2
|
|
; CHECK-NEXT: movq 8(%rax), %rax
|
|
; CHECK-NEXT: movb 16(%rax), %cl
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpb $16, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: # %bb.14: # %bb2.i.i2
|
|
; CHECK-NEXT: cmpb $23, %cl
|
|
; CHECK-NEXT: je .LBB3_16
|
|
; CHECK-NEXT: jmp .LBB3_9
|
|
entry:
|
|
%tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
|
|
switch i8 %tmp4, label %bb3 [
|
|
i8 18, label %bb
|
|
]
|
|
|
|
bb: ; preds = %entry
|
|
switch i32 undef, label %bb1 [
|
|
i32 0, label %bb2.i
|
|
i32 37, label %bb.i
|
|
]
|
|
|
|
bb.i: ; preds = %bb
|
|
switch i32 undef, label %bb1 [
|
|
i32 0, label %lvalue_p.exit
|
|
]
|
|
|
|
bb2.i: ; preds = %bb
|
|
br label %bb3
|
|
|
|
lvalue_p.exit: ; preds = %bb.i
|
|
%tmp21 = load %union.tree_node*, %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3]
|
|
%tmp22 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
|
|
%tmp23 = load i8, i8* %tmp22, align 8 ; <i8> [#uses=1]
|
|
%tmp24 = zext i8 %tmp23 to i32 ; <i32> [#uses=1]
|
|
switch i32 %tmp24, label %lvalue_p.exit4 [
|
|
i32 0, label %bb2.i3
|
|
i32 2, label %bb.i1
|
|
]
|
|
|
|
bb.i1: ; preds = %lvalue_p.exit
|
|
%tmp25 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
|
|
%tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp27 = load %union.tree_node*, %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
|
|
%tmp28 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp29 = load i8, i8* %tmp28, align 8 ; <i8> [#uses=1]
|
|
%tmp30 = zext i8 %tmp29 to i32 ; <i32> [#uses=1]
|
|
switch i32 %tmp30, label %lvalue_p.exit4 [
|
|
i32 0, label %bb2.i.i2
|
|
i32 2, label %bb.i.i
|
|
]
|
|
|
|
bb.i.i: ; preds = %bb.i1
|
|
%tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
|
|
%phitmp = icmp ne i32 %tmp34, 0 ; <i1> [#uses=1]
|
|
br label %lvalue_p.exit4
|
|
|
|
bb2.i.i2: ; preds = %bb.i1
|
|
%tmp35 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
|
|
%tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp37 = load %union.tree_node*, %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
|
|
%tmp38 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp39 = load i8, i8* %tmp38, align 8 ; <i8> [#uses=1]
|
|
switch i8 %tmp39, label %bb2 [
|
|
i8 16, label %lvalue_p.exit4
|
|
i8 23, label %lvalue_p.exit4
|
|
]
|
|
|
|
bb2.i3: ; preds = %lvalue_p.exit
|
|
%tmp40 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
|
|
%tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
|
|
%tmp42 = load %union.tree_node*, %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
|
|
%tmp43 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
|
|
%tmp44 = load i8, i8* %tmp43, align 8 ; <i8> [#uses=1]
|
|
switch i8 %tmp44, label %bb2 [
|
|
i8 16, label %lvalue_p.exit4
|
|
i8 23, label %lvalue_p.exit4
|
|
]
|
|
|
|
lvalue_p.exit4: ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
|
|
%tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
|
|
%tmp46 = icmp eq i8 %tmp4, 0 ; <i1> [#uses=1]
|
|
%or.cond = or i1 %tmp45, %tmp46 ; <i1> [#uses=1]
|
|
br i1 %or.cond, label %bb2, label %bb3
|
|
|
|
bb1: ; preds = %bb2.i.i, %bb.i, %bb
|
|
%.old = icmp eq i8 %tmp4, 23 ; <i1> [#uses=1]
|
|
br i1 %.old, label %bb2, label %bb3
|
|
|
|
bb2: ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
|
|
%expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
|
|
unreachable
|
|
}
|
|
|
|
declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
|
|
|
|
declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
|
|
|
|
|
|
; If one tail merging candidate falls through into the other,
|
|
; tail merging is likely profitable regardless of how few
|
|
; instructions are involved. This function should have only
|
|
; one ret instruction.
|
|
|
|
define void @foo(i1* %V) nounwind {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: je .LBB4_2
|
|
; CHECK-NEXT: # %bb.1: # %bb
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq func
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .LBB4_2: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%t0 = icmp eq i1* %V, null
|
|
br i1 %t0, label %return, label %bb
|
|
|
|
bb:
|
|
call void @func()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
declare void @func()
|
|
|
|
; one - One instruction may be tail-duplicated even with optsize.
|
|
|
|
@XYZ = external global i32
|
|
|
|
declare void @tail_call_me()
|
|
|
|
define void @one(i32 %v) nounwind optsize {
|
|
; CHECK-LABEL: one:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB5_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB5_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB5_3: # %bbx
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB5_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %v, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %v, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %v, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
define void @one_pgso(i32 %v) nounwind !prof !14 {
|
|
; CHECK-LABEL: one_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB6_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB6_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB6_3: # %bbx
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB6_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %v, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %v, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %v, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two - Same as one, but with two instructions in the common
|
|
; tail instead of one. This is too much to be merged, given
|
|
; the optsize attribute.
|
|
|
|
define void @two() nounwind optsize {
|
|
; CHECK-LABEL: two:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB7_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB7_1: # %bb7
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
define void @two_pgso() nounwind !prof !14 {
|
|
; CHECK-LABEL: two_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB8_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB8_1: # %bb7
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two_minsize - Same as two, but with minsize instead of optsize.
|
|
|
|
define void @two_minsize() nounwind minsize {
|
|
; CHECK-LABEL: two_minsize:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: je .LBB9_1
|
|
; CHECK-NEXT: # %bb.2: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB9_1: # %bb7
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
|
entry:
|
|
%0 = icmp eq i32 undef, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 undef, label %bb7 [
|
|
i32 16, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
bbx:
|
|
switch i32 undef, label %bb12 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
store volatile i32 1, i32* @XYZ
|
|
unreachable
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; two_nosize - Same as two, but without the optsize attribute.
|
|
; Now two instructions are enough to be tail-duplicated.
|
|
|
|
define void @two_nosize(i32 %x, i32 %y, i32 %z) nounwind {
|
|
; CHECK-LABEL: two_nosize:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB10_3
|
|
; CHECK-NEXT: # %bb.1: # %bby
|
|
; CHECK-NEXT: testl %esi, %esi
|
|
; CHECK-NEXT: je .LBB10_4
|
|
; CHECK-NEXT: # %bb.2: # %bb7
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB10_3: # %bbx
|
|
; CHECK-NEXT: cmpl $-1, %edx
|
|
; CHECK-NEXT: je .LBB10_4
|
|
; CHECK-NEXT: # %bb.5: # %bb12
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB10_4: # %return
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = icmp eq i32 %x, 0
|
|
br i1 %0, label %bbx, label %bby
|
|
|
|
bby:
|
|
switch i32 %y, label %bb7 [
|
|
i32 0, label %return
|
|
]
|
|
|
|
bb7:
|
|
store volatile i32 0, i32* @XYZ
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
bbx:
|
|
switch i32 %z, label %bb12 [
|
|
i32 -1, label %return
|
|
]
|
|
|
|
bb12:
|
|
store volatile i32 0, i32* @XYZ
|
|
tail call void @tail_call_me()
|
|
ret void
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
; Tail-merging should merge the two ret instructions since one side
|
|
; can fall-through into the ret and the other side has to branch anyway.
|
|
|
|
define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
|
|
; CHECK-LABEL: TESTE:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: movl $1, %eax
|
|
; CHECK-NEXT: cmovgq %rdi, %rax
|
|
; CHECK-NEXT: testq %rsi, %rsi
|
|
; CHECK-NEXT: jle .LBB11_2
|
|
; CHECK-NEXT: # %bb.1: # %bb.nph
|
|
; CHECK-NEXT: imulq %rdi, %rsi
|
|
; CHECK-NEXT: movq %rsi, %rax
|
|
; CHECK-NEXT: .LBB11_2: # %for.end
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
|
|
%varx.0 = select i1 %cmp, i64 1, i64 %parami ; <i64> [#uses=1]
|
|
%cmp410 = icmp slt i64 %paraml, 1 ; <i1> [#uses=1]
|
|
br i1 %cmp410, label %for.end, label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry
|
|
%tmp15 = mul i64 %paraml, %parami ; <i64> [#uses=1]
|
|
ret i64 %tmp15
|
|
|
|
for.end: ; preds = %entry
|
|
ret i64 %varx.0
|
|
}
|
|
|
|
; We should tail merge small blocks that don't end in a tail call or return
|
|
; instruction. Those blocks are typically unreachable and will be placed
|
|
; out-of-line after the main return, so we should try to eliminate as many of
|
|
; them as possible.
|
|
|
|
declare void @abort()
|
|
define void @merge_aborts() {
|
|
; CHECK-LABEL: merge_aborts:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.1: # %cont1
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.2: # %cont2
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.3: # %cont3
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB12_5
|
|
; CHECK-NEXT: # %bb.4: # %cont4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB12_5: # %abort1
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq abort
|
|
entry:
|
|
%c1 = call i1 @qux()
|
|
br i1 %c1, label %cont1, label %abort1
|
|
abort1:
|
|
call void @abort()
|
|
unreachable
|
|
cont1:
|
|
%c2 = call i1 @qux()
|
|
br i1 %c2, label %cont2, label %abort2
|
|
abort2:
|
|
call void @abort()
|
|
unreachable
|
|
cont2:
|
|
%c3 = call i1 @qux()
|
|
br i1 %c3, label %cont3, label %abort3
|
|
abort3:
|
|
call void @abort()
|
|
unreachable
|
|
cont3:
|
|
%c4 = call i1 @qux()
|
|
br i1 %c4, label %cont4, label %abort4
|
|
abort4:
|
|
call void @abort()
|
|
unreachable
|
|
cont4:
|
|
ret void
|
|
}
|
|
|
|
; Use alternating abort functions so that the blocks we wish to merge are not
|
|
; layout successors during branch folding.
|
|
|
|
declare void @alt_abort()
|
|
|
|
define void @merge_alternating_aborts() {
|
|
; CHECK-LABEL: merge_alternating_aborts:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_5
|
|
; CHECK-NEXT: # %bb.1: # %cont1
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_6
|
|
; CHECK-NEXT: # %bb.2: # %cont2
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_5
|
|
; CHECK-NEXT: # %bb.3: # %cont3
|
|
; CHECK-NEXT: callq qux
|
|
; CHECK-NEXT: testb $1, %al
|
|
; CHECK-NEXT: je .LBB13_6
|
|
; CHECK-NEXT: # %bb.4: # %cont4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB13_5: # %abort1
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: callq abort
|
|
; CHECK-NEXT: .LBB13_6: # %abort2
|
|
; CHECK-NEXT: callq alt_abort
|
|
entry:
|
|
%c1 = call i1 @qux()
|
|
br i1 %c1, label %cont1, label %abort1
|
|
abort1:
|
|
call void @abort()
|
|
unreachable
|
|
cont1:
|
|
%c2 = call i1 @qux()
|
|
br i1 %c2, label %cont2, label %abort2
|
|
abort2:
|
|
call void @alt_abort()
|
|
unreachable
|
|
cont2:
|
|
%c3 = call i1 @qux()
|
|
br i1 %c3, label %cont3, label %abort3
|
|
abort3:
|
|
call void @abort()
|
|
unreachable
|
|
cont3:
|
|
%c4 = call i1 @qux()
|
|
br i1 %c4, label %cont4, label %abort4
|
|
abort4:
|
|
call void @alt_abort()
|
|
unreachable
|
|
cont4:
|
|
ret void
|
|
}
|
|
|
|
; This triggers a situation where a new block (bb4 is split) is created and then
|
|
; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
|
|
@GV = global i32 0
|
|
define void @bfi_new_block_pgso(i32 %c) nounwind {
|
|
; CHECK-LABEL: bfi_new_block_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: je .LBB14_4
|
|
; CHECK-NEXT: # %bb.1: # %bb1
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: cmpl $16, %edi
|
|
; CHECK-NEXT: je .LBB14_6
|
|
; CHECK-NEXT: # %bb.2: # %bb1
|
|
; CHECK-NEXT: cmpl $17, %edi
|
|
; CHECK-NEXT: je .LBB14_7
|
|
; CHECK-NEXT: # %bb.3: # %bb4
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
; CHECK-NEXT: .LBB14_4: # %bb5
|
|
; CHECK-NEXT: cmpl $128, %edi
|
|
; CHECK-NEXT: jne .LBB14_8
|
|
; CHECK-NEXT: # %bb.5: # %return
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB14_6: # %bb3
|
|
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
|
; CHECK-NEXT: .LBB14_7: # %bb4
|
|
; CHECK-NEXT: callq func
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .LBB14_8: # %bb6
|
|
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
|
entry:
|
|
%0 = icmp eq i32 %c, 0
|
|
br i1 %0, label %bb5, label %bb1
|
|
|
|
bb1:
|
|
switch i32 %c, label %bb4 [
|
|
i32 16, label %bb3
|
|
i32 17, label %bb2
|
|
]
|
|
|
|
bb2:
|
|
call void @func()
|
|
br label %bb4
|
|
|
|
bb3:
|
|
store i32 0, i32* @GV
|
|
call void @func()
|
|
br label %bb4
|
|
|
|
bb4:
|
|
tail call void @tail_call_me()
|
|
br label %return
|
|
|
|
bb5:
|
|
switch i32 %c, label %bb6 [
|
|
i32 128, label %return
|
|
]
|
|
|
|
bb6:
|
|
tail call void @tail_call_me()
|
|
br label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 0}
|