1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PGO][PGSO] Enable size optimizations in code gen / target passes for cold code.

Summary: Split off of D67120.

Reviewers: davidxl

Subscribers: hiraditya, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71288
This commit is contained in:
Hiroshi Yamauchi 2019-11-07 08:52:05 -08:00
parent c2d94cd107
commit 88791ed5ec
36 changed files with 4253 additions and 251 deletions

View File

@ -29,7 +29,7 @@ cl::opt<bool> PGSOColdCodeOnly(
"to cold code."));
cl::opt<bool> PGSOIRPassOrTestOnly(
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(true),
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only"
"to the IR passes or tests."));

View File

@ -0,0 +1,128 @@
; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \
; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s
; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
; CHECK-LABEL: fct1:
; For small size (<= 256), we do not change memset to bzero.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct1(i8* nocapture %ptr) !prof !14 {
entry:
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false)
ret void
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
; CHECK-LABEL: fct2:
; When the size is bigger than 256, change into bzero.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct2(i8* nocapture %ptr) !prof !14 {
entry:
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false)
ret void
}
; CHECK-LABEL: fct3:
; For unknown size, change to bzero.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct3(i8* nocapture %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false)
ret void
}
; CHECK-LABEL: fct4:
; Size <= 256, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct4(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
ret void
}
declare i8* @__memset_chk(i8*, i32, i64, i64)
declare i64 @llvm.objectsize.i64(i8*, i1)
; CHECK-LABEL: fct5:
; Size > 256, change.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct5(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
ret void
}
; CHECK-LABEL: fct6:
; Size = unknown, change.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct6(i8* %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
ret void
}
; Next functions check that memset is not turned into bzero
; when the set constant is non-zero, whatever the given size.
; CHECK-LABEL: fct7:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct7(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
ret void
}
; CHECK-LABEL: fct8:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct8(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
ret void
}
; CHECK-LABEL: fct9:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct9(i8* %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -203,3 +203,136 @@ default: unreachable
return: ret void
}
define i32 @jt1_optsize(i32 %a, i32 %b) optsize {
entry:
switch i32 %a, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
i32 10, label %bb10
i32 11, label %bb11
i32 12, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 15, label %bb15
i32 16, label %bb16
i32 17, label %bb17
]
; CHECK-LABEL: function jt1_optsize:
; CHECK-NEXT: Jump Tables:
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK0-NOT: %jump-table.1:
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK4-NOT: %jump-table.1:
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK8-NOT: %jump-table.1:
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK16-NOT: %jump-table.1:
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM1-NOT: %jump-table.1:
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM3-NOT: %jump-table.1:
; CHECK-DAG: End machine code for function jt1_optsize.
bb1: tail call void @ext(i32 1, i32 0) br label %return
bb2: tail call void @ext(i32 2, i32 2) br label %return
bb3: tail call void @ext(i32 3, i32 4) br label %return
bb4: tail call void @ext(i32 4, i32 6) br label %return
bb5: tail call void @ext(i32 5, i32 8) br label %return
bb6: tail call void @ext(i32 6, i32 10) br label %return
bb7: tail call void @ext(i32 7, i32 12) br label %return
bb8: tail call void @ext(i32 8, i32 14) br label %return
bb9: tail call void @ext(i32 9, i32 16) br label %return
bb10: tail call void @ext(i32 1, i32 18) br label %return
bb11: tail call void @ext(i32 2, i32 20) br label %return
bb12: tail call void @ext(i32 3, i32 22) br label %return
bb13: tail call void @ext(i32 4, i32 24) br label %return
bb14: tail call void @ext(i32 5, i32 26) br label %return
bb15: tail call void @ext(i32 6, i32 28) br label %return
bb16: tail call void @ext(i32 7, i32 30) br label %return
bb17: tail call void @ext(i32 8, i32 32) br label %return
return: ret i32 %b
}
define i32 @jt1_pgso(i32 %a, i32 %b) !prof !14 {
entry:
switch i32 %a, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
i32 10, label %bb10
i32 11, label %bb11
i32 12, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 15, label %bb15
i32 16, label %bb16
i32 17, label %bb17
]
; CHECK-LABEL: function jt1_pgso:
; CHECK-NEXT: Jump Tables:
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK0-NOT: %jump-table.1:
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK4-NOT: %jump-table.1:
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK8-NOT: %jump-table.1:
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK16-NOT: %jump-table.1:
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM1-NOT: %jump-table.1:
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM3-NOT: %jump-table.1:
; CHECK-DAG: End machine code for function jt1_pgso.
bb1: tail call void @ext(i32 1, i32 0) br label %return
bb2: tail call void @ext(i32 2, i32 2) br label %return
bb3: tail call void @ext(i32 3, i32 4) br label %return
bb4: tail call void @ext(i32 4, i32 6) br label %return
bb5: tail call void @ext(i32 5, i32 8) br label %return
bb6: tail call void @ext(i32 6, i32 10) br label %return
bb7: tail call void @ext(i32 7, i32 12) br label %return
bb8: tail call void @ext(i32 8, i32 14) br label %return
bb9: tail call void @ext(i32 9, i32 16) br label %return
bb10: tail call void @ext(i32 1, i32 18) br label %return
bb11: tail call void @ext(i32 2, i32 20) br label %return
bb12: tail call void @ext(i32 3, i32 22) br label %return
bb13: tail call void @ext(i32 4, i32 24) br label %return
bb14: tail call void @ext(i32 5, i32 26) br label %return
bb15: tail call void @ext(i32 6, i32 28) br label %return
bb16: tail call void @ext(i32 7, i32 30) br label %return
bb17: tail call void @ext(i32 8, i32 32) br label %return
return: ret i32 %b
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -17,3 +17,28 @@ define void @f_optsize(<4 x i32>* %p) optsize {
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
ret void
}
; CHECK-LABEL: f_pgso:
; CHECK: vld1.64 {{.*}}, [r1]
; CHECK: .p2align 3
define void @f_pgso(<4 x i32>* %p) !prof !14 {
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -23,6 +23,17 @@ entry:
ret void
}
; Perform tail call optimization for external symbol.
@dest_pgso = global [2 x i8] zeroinitializer
define void @caller_extern_pgso(i8* %src) !prof !14 {
entry:
; CHECK: caller_extern_pgso
; CHECK-NOT: call memcpy
; CHECK: tail memcpy
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest_pgso, i32 0, i32 0), i8* %src, i32 7, i1 false)
ret void
}
; Perform indirect tail call optimization (for function pointer call).
declare void @callee_indirect1()
declare void @callee_indirect2()
@ -146,3 +157,20 @@ entry:
tail call void @callee_nostruct()
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -29,6 +29,13 @@ define i32 @test_minsize(i32 %a) nounwind minsize {
ret i32 %a
}
define i32 @test_pgso(i32 %a) nounwind !prof !14 {
; CHECK: test_pgso
; CHECK: movl
; CHECK-NEXT: ret
ret i32 %a
}
define i32 @test_add(i32 %a, i32 %b) nounwind {
; CHECK: test_add
; CHECK: addl
@ -101,3 +108,19 @@ while.end:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -190,6 +190,16 @@ define float @floor_f32_load(float* %aptr) optsize {
ret float %res
}
define float @floor_f32_load_pgso(float* %aptr) !prof !14 {
; CHECK-LABEL: floor_f32_load_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load float, float* %aptr
%res = call float @llvm.floor.f32(float %a)
ret float %res
}
define double @nearbyint_f64_load(double* %aptr) optsize {
; CHECK-LABEL: nearbyint_f64_load:
; CHECK: # %bb.0:
@ -200,3 +210,29 @@ define double @nearbyint_f64_load(double* %aptr) optsize {
ret double %res
}
define double @nearbyint_f64_load_pgso(double* %aptr) !prof !14 {
; CHECK-LABEL: nearbyint_f64_load_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load double, double* %aptr
%res = call double @llvm.nearbyint.f64(double %a)
ret double %res
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -1970,6 +1970,47 @@ define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
ret <32 x i16> %ret
}
define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
; KNL-LABEL: test_build_vec_v32i1_pgso:
; KNL: ## %bb.0:
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test_build_vec_v32i1_pgso:
; SKX: ## %bb.0:
; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_build_vec_v32i1_pgso:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_build_vec_v32i1_pgso:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; X86-LABEL: test_build_vec_v32i1_pgso:
; X86: ## %bb.0:
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
%ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; KNL-LABEL: test_build_vec_v64i1:
; KNL: ## %bb.0:
@ -2013,12 +2054,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB43_2
; KNL-NEXT: je LBB44_2
; KNL-NEXT: ## %bb.1: ## %L1
; KNL-NEXT: vmovapd %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB43_2: ## %L2
; KNL-NEXT: LBB44_2: ## %L2
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@ -2029,12 +2070,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0
; SKX-NEXT: ktestb %k0, %k1
; SKX-NEXT: je LBB43_2
; SKX-NEXT: je LBB44_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB43_2: ## %L2
; SKX-NEXT: LBB44_2: ## %L2
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@ -2046,12 +2087,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB43_2
; AVX512BW-NEXT: je LBB44_2
; AVX512BW-NEXT: ## %bb.1: ## %L1
; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB43_2: ## %L2
; AVX512BW-NEXT: LBB44_2: ## %L2
; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2062,12 +2103,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT: ktestb %k0, %k1
; AVX512DQ-NEXT: je LBB43_2
; AVX512DQ-NEXT: je LBB44_2
; AVX512DQ-NEXT: ## %bb.1: ## %L1
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB43_2: ## %L2
; AVX512DQ-NEXT: LBB44_2: ## %L2
; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@ -2079,12 +2120,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0
; X86-NEXT: ktestb %k0, %k1
; X86-NEXT: je LBB43_2
; X86-NEXT: je LBB44_2
; X86-NEXT: ## %bb.1: ## %L1
; X86-NEXT: vmovapd %zmm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB43_2: ## %L2
; X86-NEXT: LBB44_2: ## %L2
; X86-NEXT: vmovapd %zmm0, 8(%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
@ -2131,13 +2172,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
; KNL-NEXT: je LBB44_2
; KNL-NEXT: je LBB45_2
; KNL-NEXT: ## %bb.1: ## %L1
; KNL-NEXT: vmovaps %zmm0, (%rdi)
; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB44_2: ## %L2
; KNL-NEXT: LBB45_2: ## %L2
; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
; KNL-NEXT: vzeroupper
@ -2154,13 +2195,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k1
; SKX-NEXT: kortestd %k1, %k0
; SKX-NEXT: je LBB44_2
; SKX-NEXT: je LBB45_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB44_2: ## %L2
; SKX-NEXT: LBB45_2: ## %L2
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
; SKX-NEXT: vzeroupper
@ -2177,13 +2218,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
; AVX512BW-NEXT: kortestd %k1, %k0
; AVX512BW-NEXT: je LBB44_2
; AVX512BW-NEXT: je LBB45_2
; AVX512BW-NEXT: ## %bb.1: ## %L1
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB44_2: ## %L2
; AVX512BW-NEXT: LBB45_2: ## %L2
; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
; AVX512BW-NEXT: vzeroupper
@ -2203,13 +2244,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; AVX512DQ-NEXT: kmovw %k0, %ecx
; AVX512DQ-NEXT: shll $16, %ecx
; AVX512DQ-NEXT: orl %eax, %ecx
; AVX512DQ-NEXT: je LBB44_2
; AVX512DQ-NEXT: je LBB45_2
; AVX512DQ-NEXT: ## %bb.1: ## %L1
; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB44_2: ## %L2
; AVX512DQ-NEXT: LBB45_2: ## %L2
; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
; AVX512DQ-NEXT: vzeroupper
@ -2227,13 +2268,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
; X86-NEXT: kunpckwd %k1, %k2, %k1
; X86-NEXT: kortestd %k1, %k0
; X86-NEXT: je LBB44_2
; X86-NEXT: je LBB45_2
; X86-NEXT: ## %bb.1: ## %L1
; X86-NEXT: vmovaps %zmm0, (%eax)
; X86-NEXT: vmovaps %zmm1, 64(%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB44_2: ## %L2
; X86-NEXT: LBB45_2: ## %L2
; X86-NEXT: vmovaps %zmm0, 4(%eax)
; X86-NEXT: vmovaps %zmm1, 68(%eax)
; X86-NEXT: vzeroupper
@ -4188,12 +4229,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: jle LBB65_1
; KNL-NEXT: jle LBB66_1
; KNL-NEXT: ## %bb.2: ## %bb.2
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB65_1: ## %bb.1
; KNL-NEXT: LBB66_1: ## %bb.1
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -4207,12 +4248,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testw %ax, %ax
; SKX-NEXT: jle LBB65_1
; SKX-NEXT: jle LBB66_1
; SKX-NEXT: ## %bb.2: ## %bb.2
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB65_1: ## %bb.1
; SKX-NEXT: LBB66_1: ## %bb.1
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -4226,12 +4267,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testw %ax, %ax
; AVX512BW-NEXT: jle LBB65_1
; AVX512BW-NEXT: jle LBB66_1
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB65_1: ## %bb.1
; AVX512BW-NEXT: LBB66_1: ## %bb.1
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -4245,12 +4286,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: testw %ax, %ax
; AVX512DQ-NEXT: jle LBB65_1
; AVX512DQ-NEXT: jle LBB66_1
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB65_1: ## %bb.1
; AVX512DQ-NEXT: LBB66_1: ## %bb.1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -4264,12 +4305,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: testw %ax, %ax
; X86-NEXT: jle LBB65_1
; X86-NEXT: jle LBB66_1
; X86-NEXT: ## %bb.2: ## %bb.2
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB65_1: ## %bb.1
; X86-NEXT: LBB66_1: ## %bb.1
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -4297,11 +4338,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: jb LBB66_2
; CHECK-NEXT: jb LBB67_2
; CHECK-NEXT: ## %bb.1: ## %bb.1
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq _foo
; CHECK-NEXT: LBB66_2: ## %bb.2
; CHECK-NEXT: LBB67_2: ## %bb.2
; CHECK-NEXT: popq %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -4313,11 +4354,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT: kortestw %k0, %k0
; X86-NEXT: jb LBB66_2
; X86-NEXT: jb LBB67_2
; X86-NEXT: ## %bb.1: ## %bb.1
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: LBB66_2: ## %bb.2
; X86-NEXT: LBB67_2: ## %bb.2
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
@ -4505,12 +4546,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB72_1
; KNL-NEXT: je LBB73_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB72_1: ## %bar
; KNL-NEXT: LBB73_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -4527,12 +4568,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: ktestb %k1, %k0
; SKX-NEXT: je LBB72_1
; SKX-NEXT: je LBB73_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB72_1: ## %bar
; SKX-NEXT: LBB73_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -4555,12 +4596,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB72_1
; AVX512BW-NEXT: je LBB73_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB72_1: ## %bar
; AVX512BW-NEXT: LBB73_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -4581,12 +4622,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: korb %k3, %k2, %k1
; AVX512DQ-NEXT: ktestb %k1, %k0
; AVX512DQ-NEXT: je LBB72_1
; AVX512DQ-NEXT: je LBB73_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB72_1: ## %bar
; AVX512DQ-NEXT: LBB73_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -4603,12 +4644,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: ktestb %k1, %k0
; X86-NEXT: je LBB72_1
; X86-NEXT: je LBB73_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB72_1: ## %bar
; X86-NEXT: LBB73_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -4646,12 +4687,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB73_1
; KNL-NEXT: je LBB74_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB73_1: ## %bar
; KNL-NEXT: LBB74_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -4668,12 +4709,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: ktestb %k1, %k0
; SKX-NEXT: je LBB73_1
; SKX-NEXT: je LBB74_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB73_1: ## %bar
; SKX-NEXT: LBB74_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -4692,12 +4733,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB73_1
; AVX512BW-NEXT: je LBB74_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB73_1: ## %bar
; AVX512BW-NEXT: LBB74_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -4714,12 +4755,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
; AVX512DQ-NEXT: korb %k2, %k1, %k1
; AVX512DQ-NEXT: ktestb %k1, %k0
; AVX512DQ-NEXT: je LBB73_1
; AVX512DQ-NEXT: je LBB74_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB73_1: ## %bar
; AVX512DQ-NEXT: LBB74_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -4736,12 +4777,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: ktestb %k1, %k0
; X86-NEXT: je LBB73_1
; X86-NEXT: je LBB74_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB73_1: ## %bar
; X86-NEXT: LBB74_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -4778,12 +4819,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: je LBB74_1
; KNL-NEXT: je LBB75_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB74_1: ## %bar
; KNL-NEXT: LBB75_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -4800,12 +4841,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
; SKX-NEXT: korw %k2, %k1, %k1
; SKX-NEXT: ktestw %k1, %k0
; SKX-NEXT: je LBB74_1
; SKX-NEXT: je LBB75_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB74_1: ## %bar
; SKX-NEXT: LBB75_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -4823,12 +4864,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; AVX512BW-NEXT: korw %k2, %k1, %k1
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kortestw %k0, %k0
; AVX512BW-NEXT: je LBB74_1
; AVX512BW-NEXT: je LBB75_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB74_1: ## %bar
; AVX512BW-NEXT: LBB75_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -4845,12 +4886,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
; AVX512DQ-NEXT: korw %k2, %k1, %k1
; AVX512DQ-NEXT: ktestw %k1, %k0
; AVX512DQ-NEXT: je LBB74_1
; AVX512DQ-NEXT: je LBB75_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB74_1: ## %bar
; AVX512DQ-NEXT: LBB75_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -4867,12 +4908,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
; X86-NEXT: korw %k2, %k1, %k1
; X86-NEXT: ktestw %k1, %k0
; X86-NEXT: je LBB74_1
; X86-NEXT: je LBB75_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB74_1: ## %bar
; X86-NEXT: LBB75_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -4928,12 +4969,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
; KNL-NEXT: je LBB75_1
; KNL-NEXT: je LBB76_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB75_1: ## %bar
; KNL-NEXT: LBB76_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -4950,12 +4991,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: ktestd %k1, %k0
; SKX-NEXT: je LBB75_1
; SKX-NEXT: je LBB76_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB75_1: ## %bar
; SKX-NEXT: LBB76_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -4972,12 +5013,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
; AVX512BW-NEXT: kord %k2, %k1, %k1
; AVX512BW-NEXT: ktestd %k1, %k0
; AVX512BW-NEXT: je LBB75_1
; AVX512BW-NEXT: je LBB76_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB75_1: ## %bar
; AVX512BW-NEXT: LBB76_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -5014,12 +5055,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; AVX512DQ-NEXT: kmovw %k0, %ecx
; AVX512DQ-NEXT: shll $16, %ecx
; AVX512DQ-NEXT: orl %eax, %ecx
; AVX512DQ-NEXT: je LBB75_1
; AVX512DQ-NEXT: je LBB76_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB75_1: ## %bar
; AVX512DQ-NEXT: LBB76_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -5036,12 +5077,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
; X86-NEXT: kord %k2, %k1, %k1
; X86-NEXT: ktestd %k1, %k0
; X86-NEXT: je LBB75_1
; X86-NEXT: je LBB76_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB75_1: ## %bar
; X86-NEXT: LBB76_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -5121,12 +5162,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; KNL-NEXT: orl %eax, %edx
; KNL-NEXT: shlq $32, %rdx
; KNL-NEXT: orq %rcx, %rdx
; KNL-NEXT: je LBB76_1
; KNL-NEXT: je LBB77_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB76_1: ## %bar
; KNL-NEXT: LBB77_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
@ -5143,12 +5184,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
; SKX-NEXT: korq %k2, %k1, %k1
; SKX-NEXT: ktestq %k1, %k0
; SKX-NEXT: je LBB76_1
; SKX-NEXT: je LBB77_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB76_1: ## %bar
; SKX-NEXT: LBB77_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
@ -5165,12 +5206,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
; AVX512BW-NEXT: korq %k2, %k1, %k1
; AVX512BW-NEXT: ktestq %k1, %k0
; AVX512BW-NEXT: je LBB76_1
; AVX512BW-NEXT: je LBB77_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB76_1: ## %bar
; AVX512BW-NEXT: LBB77_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
@ -5231,12 +5272,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; AVX512DQ-NEXT: orl %eax, %edx
; AVX512DQ-NEXT: shlq $32, %rdx
; AVX512DQ-NEXT: orq %rcx, %rdx
; AVX512DQ-NEXT: je LBB76_1
; AVX512DQ-NEXT: je LBB77_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB76_1: ## %bar
; AVX512DQ-NEXT: LBB77_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
@ -5255,12 +5296,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; X86-NEXT: kandq %k1, %k0, %k0
; X86-NEXT: kshiftrq $32, %k0, %k1
; X86-NEXT: kortestd %k1, %k0
; X86-NEXT: je LBB76_1
; X86-NEXT: je LBB77_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB76_1: ## %bar
; X86-NEXT: LBB77_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
@ -5360,3 +5401,20 @@ define <64 x i1> @mask64_insert(i32 %a) {
%maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
ret <64 x i1> %maskv
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -130,6 +130,24 @@ define i64 @div64_optsize(i64 %a, i64 %b) optsize {
ret i64 %div
}
define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 {
; CHECK-LABEL: div64_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: cqto
; CHECK-NEXT: idivq %rsi
; CHECK-NEXT: retq
;
; HUGEWS-LABEL: div64_pgso:
; HUGEWS: # %bb.0:
; HUGEWS-NEXT: movq %rdi, %rax
; HUGEWS-NEXT: cqto
; HUGEWS-NEXT: idivq %rsi
; HUGEWS-NEXT: retq
%div = sdiv i64 %a, %b
ret i64 %div
}
define i64 @div64_hugews(i64 %a, i64 %b) {
; ATOM-LABEL: div64_hugews:
; ATOM: # %bb.0:
@ -137,12 +155,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
; ATOM-NEXT: movq %rdi, %rax
; ATOM-NEXT: orq %rsi, %rcx
; ATOM-NEXT: shrq $32, %rcx
; ATOM-NEXT: je .LBB3_1
; ATOM-NEXT: je .LBB4_1
; ATOM-NEXT: # %bb.2:
; ATOM-NEXT: cqto
; ATOM-NEXT: idivq %rsi
; ATOM-NEXT: retq
; ATOM-NEXT: .LBB3_1:
; ATOM-NEXT: .LBB4_1:
; ATOM-NEXT: # kill: def $eax killed $eax killed $rax
; ATOM-NEXT: xorl %edx, %edx
; ATOM-NEXT: divl %esi
@ -155,12 +173,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
; SLM-NEXT: movq %rdi, %rax
; SLM-NEXT: orq %rsi, %rcx
; SLM-NEXT: shrq $32, %rcx
; SLM-NEXT: je .LBB3_1
; SLM-NEXT: je .LBB4_1
; SLM-NEXT: # %bb.2:
; SLM-NEXT: cqto
; SLM-NEXT: idivq %rsi
; SLM-NEXT: retq
; SLM-NEXT: .LBB3_1:
; SLM-NEXT: .LBB4_1:
; SLM-NEXT: xorl %edx, %edx
; SLM-NEXT: # kill: def $eax killed $eax killed $rax
; SLM-NEXT: divl %esi
@ -173,12 +191,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
; SKL-NEXT: movq %rdi, %rcx
; SKL-NEXT: orq %rsi, %rcx
; SKL-NEXT: shrq $32, %rcx
; SKL-NEXT: je .LBB3_1
; SKL-NEXT: je .LBB4_1
; SKL-NEXT: # %bb.2:
; SKL-NEXT: cqto
; SKL-NEXT: idivq %rsi
; SKL-NEXT: retq
; SKL-NEXT: .LBB3_1:
; SKL-NEXT: .LBB4_1:
; SKL-NEXT: # kill: def $eax killed $eax killed $rax
; SKL-NEXT: xorl %edx, %edx
; SKL-NEXT: divl %esi
@ -213,6 +231,24 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize {
ret i32 %div
}
define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 {
; CHECK-LABEL: div32_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %esi
; CHECK-NEXT: retq
;
; HUGEWS-LABEL: div32_pgso:
; HUGEWS: # %bb.0:
; HUGEWS-NEXT: movl %edi, %eax
; HUGEWS-NEXT: cltd
; HUGEWS-NEXT: idivl %esi
; HUGEWS-NEXT: retq
%div = sdiv i32 %a, %b
ret i32 %div
}
define i32 @div32_minsize(i32 %a, i32 %b) minsize {
; CHECK-LABEL: div32_minsize:
; CHECK: # %bb.0:
@ -246,3 +282,4 @@ define i32 @div32_minsize(i32 %a, i32 %b) minsize {
!12 = !{i32 10000, i64 1000, i32 1}
!13 = !{i32 999000, i64 1000, i32 3}
!14 = !{i32 999999, i64 5, i32 3}
!15 = !{!"function_entry_count", i64 0}

View File

@ -88,7 +88,7 @@ define i32 @weighted_select1(i32 %a, i32 %b) {
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
ret i32 %sel
}
@ -104,7 +104,7 @@ define i32 @weighted_select2(i32 %a, i32 %b) {
; CHECK-NEXT: .LBB6_2: # %select.end
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !1
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
ret i32 %sel
}
@ -124,7 +124,7 @@ define i32 @weighted_select3(i32 %a, i32 %b) {
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
ret i32 %sel
}
@ -137,12 +137,51 @@ define i32 @unweighted_select(i32 %a, i32 %b) {
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !3
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !18
ret i32 %sel
}
!0 = !{!"branch_weights", i32 1, i32 99}
!1 = !{!"branch_weights", i32 1, i32 100}
!2 = !{!"branch_weights", i32 100, i32 1}
!3 = !{!"branch_weights", i32 0, i32 0}
define i32 @weighted_select_optsize(i32 %a, i32 %b) optsize {
; CHECK-LABEL: weighted_select_optsize:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
ret i32 %sel
}
define i32 @weighted_select_pgso(i32 %a, i32 %b) !prof !14 {
; CHECK-LABEL: weighted_select_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: retq
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
ret i32 %sel
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
!15 = !{!"branch_weights", i32 1, i32 99}
!16 = !{!"branch_weights", i32 1, i32 100}
!17 = !{!"branch_weights", i32 100, i32 1}
!18 = !{!"branch_weights", i32 0, i32 0}

View File

@ -0,0 +1,242 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
; RUN: llc < %s -mtriple=x86_64-win32 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=WIN64
declare void @foo()
declare void @bar()
define void @f(i32 %x, i32 %y) !prof !14 {
; CHECK32-LABEL: f:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x08]
; CHECK32-NEXT: jne bar # TAILCALL
; CHECK32-NEXT: # encoding: [0x75,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
; CHECK32-NEXT: # %bb.1: # %bb1
; CHECK32-NEXT: jmp foo # TAILCALL
; CHECK32-NEXT: # encoding: [0xeb,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
;
; CHECK64-LABEL: f:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
; CHECK64-NEXT: jne bar # TAILCALL
; CHECK64-NEXT: # encoding: [0x75,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
; CHECK64-NEXT: # %bb.1: # %bb1
; CHECK64-NEXT: jmp foo # TAILCALL
; CHECK64-NEXT: # encoding: [0xeb,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
;
; WIN64-LABEL: f:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
; WIN64-NEXT: jne bar # TAILCALL
; WIN64-NEXT: # encoding: [0x75,A]
; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
; WIN64-NEXT: # %bb.1: # %bb1
; WIN64-NEXT: jmp foo # TAILCALL
; WIN64-NEXT: # encoding: [0xeb,A]
; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
; Check that the asm doesn't just look good, but uses the correct encoding.
}
define void @f_non_leaf(i32 %x, i32 %y) !prof !14 {
; CHECK32-LABEL: f_non_leaf:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: pushl %ebx # encoding: [0x53]
; CHECK32-NEXT: .cfi_def_cfa_offset 8
; CHECK32-NEXT: .cfi_offset %ebx, -8
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
; CHECK32-NEXT: #APP
; CHECK32-NEXT: #NO_APP
; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c]
; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
; CHECK32-NEXT: # %bb.1: # %bb1
; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
; CHECK32-NEXT: .cfi_def_cfa_offset 4
; CHECK32-NEXT: jmp foo # TAILCALL
; CHECK32-NEXT: # encoding: [0xeb,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
; CHECK32-NEXT: .LBB1_2: # %bb2
; CHECK32-NEXT: .cfi_def_cfa_offset 8
; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
; CHECK32-NEXT: .cfi_def_cfa_offset 4
; CHECK32-NEXT: jmp bar # TAILCALL
; CHECK32-NEXT: # encoding: [0xeb,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
;
; CHECK64-LABEL: f_non_leaf:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: pushq %rbx # encoding: [0x53]
; CHECK64-NEXT: .cfi_def_cfa_offset 16
; CHECK64-NEXT: .cfi_offset %rbx, -16
; CHECK64-NEXT: #APP
; CHECK64-NEXT: #NO_APP
; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
; CHECK64-NEXT: # %bb.1: # %bb1
; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
; CHECK64-NEXT: .cfi_def_cfa_offset 8
; CHECK64-NEXT: jmp foo # TAILCALL
; CHECK64-NEXT: # encoding: [0xeb,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
; CHECK64-NEXT: .LBB1_2: # %bb2
; CHECK64-NEXT: .cfi_def_cfa_offset 16
; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
; CHECK64-NEXT: .cfi_def_cfa_offset 8
; CHECK64-NEXT: jmp bar # TAILCALL
; CHECK64-NEXT: # encoding: [0xeb,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
;
; WIN64-LABEL: f_non_leaf:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rbx # encoding: [0x53]
; WIN64-NEXT: .seh_pushreg %rbx
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: #APP
; WIN64-NEXT: #NO_APP
; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
; WIN64-NEXT: # %bb.1: # %bb1
; WIN64-NEXT: popq %rbx # encoding: [0x5b]
; WIN64-NEXT: jmp foo # TAILCALL
; WIN64-NEXT: # encoding: [0xeb,A]
; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
; WIN64-NEXT: .LBB1_2: # %bb2
; WIN64-NEXT: nop # encoding: [0x90]
; WIN64-NEXT: popq %rbx # encoding: [0x5b]
; WIN64-NEXT: jmp bar # TAILCALL
; WIN64-NEXT: # encoding: [0xeb,A]
; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
entry:
; Force %ebx to be spilled on the stack, turning this into
; not a "leaf" function for Win64.
tail call void asm sideeffect "", "~{ebx}"()
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
declare x86_thiscallcc zeroext i1 @baz(i8*, i32)
define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) !prof !14 {
; CHECK32-LABEL: BlockPlacementTest:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
; CHECK32-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
; CHECK32-NEXT: je .LBB2_3 # encoding: [0x74,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
; CHECK32-NEXT: # %bb.1: # %land.rhs
; CHECK32-NEXT: movb $1, %al # encoding: [0xb0,0x01]
; CHECK32-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
; CHECK32-NEXT: je baz # TAILCALL
; CHECK32-NEXT: # encoding: [0x74,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
; CHECK32-NEXT: .LBB2_2: # %land.end
; CHECK32-NEXT: # kill: def $al killed $al killed $eax
; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00]
; CHECK32-NEXT: .LBB2_3:
; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK32-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
;
; CHECK64-LABEL: BlockPlacementTest:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: testb $42, %sil # encoding: [0x40,0xf6,0xc6,0x2a]
; CHECK64-NEXT: je .LBB2_3 # encoding: [0x74,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
; CHECK64-NEXT: # %bb.1: # %land.rhs
; CHECK64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
; CHECK64-NEXT: testb $44, %sil # encoding: [0x40,0xf6,0xc6,0x2c]
; CHECK64-NEXT: je baz # TAILCALL
; CHECK64-NEXT: # encoding: [0x74,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
; CHECK64-NEXT: .LBB2_2: # %land.end
; CHECK64-NEXT: # kill: def $al killed $al killed $eax
; CHECK64-NEXT: retq # encoding: [0xc3]
; CHECK64-NEXT: .LBB2_3:
; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
;
; WIN64-LABEL: BlockPlacementTest:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
; WIN64-NEXT: je .LBB2_3 # encoding: [0x74,A]
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
; WIN64-NEXT: # %bb.1: # %land.rhs
; WIN64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
; WIN64-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
; WIN64-NEXT: je baz # TAILCALL
; WIN64-NEXT: # encoding: [0x74,A]
; WIN64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
; WIN64-NEXT: .LBB2_2: # %land.end
; WIN64-NEXT: # kill: def $al killed $al killed $eax
; WIN64-NEXT: retq # encoding: [0xc3]
; WIN64-NEXT: .LBB2_3:
; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; WIN64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
entry:
%and = and i32 %x, 42
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %land.end, label %land.rhs
land.rhs:
%and6 = and i32 %x, 44
%tobool7 = icmp eq i32 %and6, 0
br i1 %tobool7, label %lor.rhs, label %land.end
lor.rhs:
%call = tail call x86_thiscallcc zeroext i1 @baz(i8* %this, i32 %x) #2
br label %land.end
land.end:
%0 = phi i1 [ false, %entry ], [ true, %land.rhs ], [ %call, %lor.rhs ]
ret i1 %0
; Make sure machine block placement isn't confused by the conditional tail call,
; but sees that it can fall through to the next block.
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -108,11 +108,11 @@ for.end:
ret void
}
define void @foo_nosize(i32 inreg %dns) {
; SLOW-LABEL: foo_nosize:
define void @foo_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: foo_pgso:
; SLOW: # %bb.0: # %entry
; SLOW-NEXT: movw $-1, %cx
; SLOW-NEXT: .p2align 4, 0x90
; SLOW-NEXT: xorl %ecx, %ecx
; SLOW-NEXT: decl %ecx
; SLOW-NEXT: .LBB4_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx
@ -122,10 +122,10 @@ define void @foo_nosize(i32 inreg %dns) {
; SLOW-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl
;
; FAST-LABEL: foo_nosize:
; FAST-LABEL: foo_pgso:
; FAST: # %bb.0: # %entry
; FAST-NEXT: movw $-1, %cx
; FAST-NEXT: .p2align 4, 0x90
; FAST-NEXT: xorl %ecx, %ecx
; FAST-NEXT: decl %ecx
; FAST-NEXT: .LBB4_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
@ -148,11 +148,11 @@ for.end:
ret void
}
define void @bar_nosize(i32 inreg %dns) {
; SLOW-LABEL: bar_nosize:
define void @bar_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: bar_pgso:
; SLOW: # %bb.0: # %entry
; SLOW-NEXT: movw $1, %cx
; SLOW-NEXT: .p2align 4, 0x90
; SLOW-NEXT: xorl %ecx, %ecx
; SLOW-NEXT: incl %ecx
; SLOW-NEXT: .LBB5_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx
@ -162,10 +162,10 @@ define void @bar_nosize(i32 inreg %dns) {
; SLOW-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl
;
; FAST-LABEL: bar_nosize:
; FAST-LABEL: bar_pgso:
; FAST: # %bb.0: # %entry
; FAST-NEXT: movw $1, %cx
; FAST-NEXT: .p2align 4, 0x90
; FAST-NEXT: xorl %ecx, %ecx
; FAST-NEXT: incl %ecx
; FAST-NEXT: .LBB5_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
@ -186,3 +186,99 @@ for.body:
for.end:
ret void
}
define void @foo_nosize(i32 inreg %dns) {
; SLOW-LABEL: foo_nosize:
; SLOW: # %bb.0: # %entry
; SLOW-NEXT: movw $-1, %cx
; SLOW-NEXT: .p2align 4, 0x90
; SLOW-NEXT: .LBB6_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx
; SLOW-NEXT: decl %ecx
; SLOW-NEXT: cmpl %eax, %edx
; SLOW-NEXT: jl .LBB6_1
; SLOW-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl
;
; FAST-LABEL: foo_nosize:
; FAST: # %bb.0: # %entry
; FAST-NEXT: movw $-1, %cx
; FAST-NEXT: .p2align 4, 0x90
; FAST-NEXT: .LBB6_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
; FAST-NEXT: addl $-1, %ecx
; FAST-NEXT: cmpl %eax, %edx
; FAST-NEXT: jl .LBB6_1
; FAST-NEXT: # %bb.2: # %for.end
; FAST-NEXT: retl
entry:
br label %for.body
for.body:
%i.05 = phi i16 [ %dec, %for.body ], [ 0, %entry ]
%dec = add i16 %i.05, -1
%conv = zext i16 %dec to i32
%cmp = icmp slt i32 %conv, %dns
br i1 %cmp, label %for.body, label %for.end
for.end:
ret void
}
define void @bar_nosize(i32 inreg %dns) {
; SLOW-LABEL: bar_nosize:
; SLOW: # %bb.0: # %entry
; SLOW-NEXT: movw $1, %cx
; SLOW-NEXT: .p2align 4, 0x90
; SLOW-NEXT: .LBB7_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx
; SLOW-NEXT: incl %ecx
; SLOW-NEXT: cmpl %eax, %edx
; SLOW-NEXT: jl .LBB7_1
; SLOW-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl
;
; FAST-LABEL: bar_nosize:
; FAST: # %bb.0: # %entry
; FAST-NEXT: movw $1, %cx
; FAST-NEXT: .p2align 4, 0x90
; FAST-NEXT: .LBB7_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
; FAST-NEXT: addl $1, %ecx
; FAST-NEXT: cmpl %eax, %edx
; FAST-NEXT: jl .LBB7_1
; FAST-NEXT: # %bb.2: # %for.end
; FAST-NEXT: retl
entry:
br label %for.body
for.body:
%i.05 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
%inc = add i16 %i.05, 1
%conv = zext i16 %inc to i32
%cmp = icmp slt i32 %conv, %dns
br i1 %cmp, label %for.body, label %for.end
for.end:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -113,6 +113,38 @@ define <4 x float> @rcpss_full_size(<4 x float>* %a) optsize {
ret <4 x float> %res
}
define float @rcpss_pgso(float* %a) !prof !14 {
; SSE-LABEL: rcpss_pgso:
; SSE: # %bb.0:
; SSE-NEXT: rcpss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: rcpss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load float, float* %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
%ext = extractelement <4 x float> %res, i32 0
ret float %ext
}
define <4 x float> @rcpss_full_pgso(<4 x float>* %a) !prof !14 {
; SSE-LABEL: rcpss_full_pgso:
; SSE: # %bb.0:
; SSE-NEXT: rcpss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: rcpss_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, <4 x float>* %a
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld)
ret <4 x float> %res
}
define float @rsqrtss_size(float* %a) optsize {
; SSE-LABEL: rsqrtss_size:
; SSE: # %bb.0:
@ -145,6 +177,38 @@ define <4 x float> @rsqrtss_full_size(<4 x float>* %a) optsize {
ret <4 x float> %res
}
define float @rsqrtss_pgso(float* %a) !prof !14 {
; SSE-LABEL: rsqrtss_pgso:
; SSE: # %bb.0:
; SSE-NEXT: rsqrtss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: rsqrtss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load float, float* %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
%ext = extractelement <4 x float> %res, i32 0
ret float %ext
}
define <4 x float> @rsqrtss_full_pgso(<4 x float>* %a) !prof !14 {
; SSE-LABEL: rsqrtss_full_pgso:
; SSE: # %bb.0:
; SSE-NEXT: rsqrtss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: rsqrtss_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, <4 x float>* %a
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld)
ret <4 x float> %res
}
define float @sqrtss_size(float* %a) optsize{
; SSE-LABEL: sqrtss_size:
; SSE: # %bb.0:
@ -196,6 +260,57 @@ define <4 x float> @sqrtss_full_size_volatile(<4 x float>* %a) optsize{
ret <4 x float> %res
}
define float @sqrtss_pgso(float* %a) !prof !14 {
; SSE-LABEL: sqrtss_pgso:
; SSE: # %bb.0:
; SSE-NEXT: sqrtss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load float, float* %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
%ext = extractelement <4 x float> %res, i32 0
ret float %ext
}
define <4 x float> @sqrtss_full_pgso(<4 x float>* %a) !prof !14 {
; SSE-LABEL: sqrtss_full_pgso:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: sqrtss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtss_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps (%rdi), %xmm0
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, <4 x float>* %a
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
ret <4 x float> %res
}
define <4 x float> @sqrtss_full_pgso_volatile(<4 x float>* %a) !prof !14 {
; SSE-LABEL: sqrtss_full_pgso_volatile:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: sqrtss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtss_full_pgso_volatile:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps (%rdi), %xmm0
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load volatile <4 x float>, <4 x float>* %a
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
ret <4 x float> %res
}
define double @sqrtsd_size(double* %a) optsize {
; SSE-LABEL: sqrtsd_size:
; SSE: # %bb.0:
@ -247,7 +362,75 @@ define <2 x double> @sqrtsd_full_size_volatile(<2 x double>* %a) optsize {
ret <2 x double> %res
}
define double @sqrtsd_pgso(double* %a) !prof !14 {
; SSE-LABEL: sqrtsd_pgso:
; SSE: # %bb.0:
; SSE-NEXT: sqrtsd (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtsd_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load double, double* %a
%ins = insertelement <2 x double> undef, double %ld, i32 0
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
%ext = extractelement <2 x double> %res, i32 0
ret double %ext
}
define <2 x double> @sqrtsd_full_pgso(<2 x double>* %a) !prof !14 {
; SSE-LABEL: sqrtsd_full_pgso:
; SSE: # %bb.0:
; SSE-NEXT: movapd (%rdi), %xmm0
; SSE-NEXT: sqrtsd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtsd_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd (%rdi), %xmm0
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <2 x double>, <2 x double>* %a
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
ret <2 x double> %res
}
define <2 x double> @sqrtsd_full_pgso_volatile(<2 x double>* %a) !prof !14 {
; SSE-LABEL: sqrtsd_full_pgso_volatile:
; SSE: # %bb.0:
; SSE-NEXT: movapd (%rdi), %xmm0
; SSE-NEXT: sqrtsd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtsd_full_pgso_volatile:
; AVX: # %bb.0:
; AVX-NEXT: vmovapd (%rdi), %xmm0
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load volatile <2 x double>, <2 x double>* %a
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
ret <2 x double> %res
}
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -196,6 +196,26 @@ define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
ret i32 %tmp
}
define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
; X86-LABEL: var_shift_i32_pgso:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: var_shift_i32_pgso:
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shldl %cl, %esi, %eax
; X64-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-LABEL: var_shift_i64:
; X86-FAST: # %bb.0:
@ -216,36 +236,36 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-NEXT: shll %cl, %edi
; X86-FAST-NEXT: shldl %cl, %eax, %ebp
; X86-FAST-NEXT: testb $32, %bl
; X86-FAST-NEXT: je .LBB4_2
; X86-FAST-NEXT: je .LBB5_2
; X86-FAST-NEXT: # %bb.1:
; X86-FAST-NEXT: movl %edi, %ebp
; X86-FAST-NEXT: xorl %edi, %edi
; X86-FAST-NEXT: .LBB4_2:
; X86-FAST-NEXT: .LBB5_2:
; X86-FAST-NEXT: movb $64, %cl
; X86-FAST-NEXT: subb %bl, %cl
; X86-FAST-NEXT: movl %edx, %esi
; X86-FAST-NEXT: shrl %cl, %esi
; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB4_3
; X86-FAST-NEXT: jne .LBB5_3
; X86-FAST-NEXT: # %bb.4:
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-FAST-NEXT: testl %ebx, %ebx
; X86-FAST-NEXT: jne .LBB4_6
; X86-FAST-NEXT: jmp .LBB4_7
; X86-FAST-NEXT: .LBB4_3:
; X86-FAST-NEXT: jne .LBB5_6
; X86-FAST-NEXT: jmp .LBB5_7
; X86-FAST-NEXT: .LBB5_3:
; X86-FAST-NEXT: movl %esi, %ecx
; X86-FAST-NEXT: xorl %esi, %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: testl %ebx, %ebx
; X86-FAST-NEXT: je .LBB4_7
; X86-FAST-NEXT: .LBB4_6:
; X86-FAST-NEXT: je .LBB5_7
; X86-FAST-NEXT: .LBB5_6:
; X86-FAST-NEXT: orl %esi, %ebp
; X86-FAST-NEXT: orl %ecx, %edi
; X86-FAST-NEXT: movl %edi, %eax
; X86-FAST-NEXT: movl %ebp, %edx
; X86-FAST-NEXT: .LBB4_7:
; X86-FAST-NEXT: .LBB5_7:
; X86-FAST-NEXT: addl $4, %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
@ -279,11 +299,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-SLOW-NEXT: testb %dl, %dl
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: je .LBB4_2
; X86-SLOW-NEXT: je .LBB5_2
; X86-SLOW-NEXT: # %bb.1:
; X86-SLOW-NEXT: orl %eax, %ebp
; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB4_2:
; X86-SLOW-NEXT: .LBB5_2:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: movl %ebp, %eax
; X86-SLOW-NEXT: movl %ebx, %ecx
@ -294,41 +314,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-SLOW-NEXT: negb %cl
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb %ch, %ch
; X86-SLOW-NEXT: je .LBB4_4
; X86-SLOW-NEXT: je .LBB5_4
; X86-SLOW-NEXT: # %bb.3:
; X86-SLOW-NEXT: orl %edi, %eax
; X86-SLOW-NEXT: movl %eax, %ebp
; X86-SLOW-NEXT: .LBB4_4:
; X86-SLOW-NEXT: .LBB5_4:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl %eax, %edi
; X86-SLOW-NEXT: movl %ebx, %ecx
; X86-SLOW-NEXT: shll %cl, %edi
; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB4_6
; X86-SLOW-NEXT: je .LBB5_6
; X86-SLOW-NEXT: # %bb.5:
; X86-SLOW-NEXT: movl %edi, %ebp
; X86-SLOW-NEXT: xorl %edi, %edi
; X86-SLOW-NEXT: .LBB4_6:
; X86-SLOW-NEXT: .LBB5_6:
; X86-SLOW-NEXT: movb %dh, %cl
; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: testb $32, %dh
; X86-SLOW-NEXT: jne .LBB4_7
; X86-SLOW-NEXT: jne .LBB5_7
; X86-SLOW-NEXT: # %bb.8:
; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: jne .LBB4_10
; X86-SLOW-NEXT: jmp .LBB4_11
; X86-SLOW-NEXT: .LBB4_7:
; X86-SLOW-NEXT: jne .LBB5_10
; X86-SLOW-NEXT: jmp .LBB5_11
; X86-SLOW-NEXT: .LBB5_7:
; X86-SLOW-NEXT: movl %esi, %ecx
; X86-SLOW-NEXT: xorl %esi, %esi
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: je .LBB4_11
; X86-SLOW-NEXT: .LBB4_10:
; X86-SLOW-NEXT: je .LBB5_11
; X86-SLOW-NEXT: .LBB5_10:
; X86-SLOW-NEXT: orl %esi, %ebp
; X86-SLOW-NEXT: orl %ecx, %edi
; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, %eax
; X86-SLOW-NEXT: .LBB4_11:
; X86-SLOW-NEXT: .LBB5_11:
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-SLOW-NEXT: addl $8, %esp
; X86-SLOW-NEXT: popl %esi
@ -503,3 +523,20 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
%tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7)
ret i64 %tmp
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -195,6 +195,26 @@ define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
ret i32 %tmp
}
define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
; X86-LABEL: var_shift_i32_pgso:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: var_shift_i32_pgso:
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %esi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrdl %cl, %edi, %eax
; X64-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-LABEL: var_shift_i64:
; X86-FAST: # %bb.0:
@ -216,30 +236,30 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-NEXT: shll %cl, %edi
; X86-FAST-NEXT: shldl %cl, %eax, %esi
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB4_2
; X86-FAST-NEXT: je .LBB5_2
; X86-FAST-NEXT: # %bb.1:
; X86-FAST-NEXT: movl %edi, %esi
; X86-FAST-NEXT: xorl %edi, %edi
; X86-FAST-NEXT: .LBB4_2:
; X86-FAST-NEXT: .LBB5_2:
; X86-FAST-NEXT: movl %edx, %ebp
; X86-FAST-NEXT: movl %ebx, %ecx
; X86-FAST-NEXT: shrl %cl, %ebp
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: shrdl %cl, %edx, %eax
; X86-FAST-NEXT: testb $32, %bl
; X86-FAST-NEXT: je .LBB4_4
; X86-FAST-NEXT: je .LBB5_4
; X86-FAST-NEXT: # %bb.3:
; X86-FAST-NEXT: movl %ebp, %eax
; X86-FAST-NEXT: xorl %ebp, %ebp
; X86-FAST-NEXT: .LBB4_4:
; X86-FAST-NEXT: .LBB5_4:
; X86-FAST-NEXT: testl %ebx, %ebx
; X86-FAST-NEXT: je .LBB4_6
; X86-FAST-NEXT: je .LBB5_6
; X86-FAST-NEXT: # %bb.5:
; X86-FAST-NEXT: orl %ebp, %esi
; X86-FAST-NEXT: orl %eax, %edi
; X86-FAST-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-FAST-NEXT: movl %esi, %edx
; X86-FAST-NEXT: .LBB4_6:
; X86-FAST-NEXT: .LBB5_6:
; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-FAST-NEXT: addl $4, %esp
; X86-FAST-NEXT: popl %esi
@ -274,11 +294,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb %ch, %ch
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: je .LBB4_2
; X86-SLOW-NEXT: je .LBB5_2
; X86-SLOW-NEXT: # %bb.1:
; X86-SLOW-NEXT: orl %edi, %edx
; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB4_2:
; X86-SLOW-NEXT: .LBB5_2:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl %ebx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edx
@ -290,41 +310,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-SLOW-NEXT: shll %cl, %edi
; X86-SLOW-NEXT: testb %ah, %ah
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: je .LBB4_4
; X86-SLOW-NEXT: je .LBB5_4
; X86-SLOW-NEXT: # %bb.3:
; X86-SLOW-NEXT: orl %edx, %edi
; X86-SLOW-NEXT: movl %edi, %ebp
; X86-SLOW-NEXT: .LBB4_4:
; X86-SLOW-NEXT: .LBB5_4:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl %ebx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB4_6
; X86-SLOW-NEXT: je .LBB5_6
; X86-SLOW-NEXT: # %bb.5:
; X86-SLOW-NEXT: movl %edi, %ebp
; X86-SLOW-NEXT: xorl %edi, %edi
; X86-SLOW-NEXT: .LBB4_6:
; X86-SLOW-NEXT: .LBB5_6:
; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: jne .LBB4_7
; X86-SLOW-NEXT: jne .LBB5_7
; X86-SLOW-NEXT: # %bb.8:
; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: jne .LBB4_10
; X86-SLOW-NEXT: jmp .LBB4_11
; X86-SLOW-NEXT: .LBB4_7:
; X86-SLOW-NEXT: jne .LBB5_10
; X86-SLOW-NEXT: jmp .LBB5_11
; X86-SLOW-NEXT: .LBB5_7:
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: xorl %esi, %esi
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: je .LBB4_11
; X86-SLOW-NEXT: .LBB4_10:
; X86-SLOW-NEXT: je .LBB5_11
; X86-SLOW-NEXT: .LBB5_10:
; X86-SLOW-NEXT: orl %ebp, %esi
; X86-SLOW-NEXT: orl %edi, %eax
; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %eax, %edx
; X86-SLOW-NEXT: .LBB4_11:
; X86-SLOW-NEXT: .LBB5_11:
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: addl $8, %esp
; X86-SLOW-NEXT: popl %esi
@ -498,3 +518,20 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
%tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 7)
ret i64 %tmp
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -1983,6 +1983,80 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize {
ret float %x230
}
define float @hadd32_4_pgso(<4 x float> %x225) !prof !14 {
; SSE3-LABEL: hadd32_4_pgso:
; SSE3: # %bb.0:
; SSE3-NEXT: movaps %xmm0, %xmm1
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE3-NEXT: addps %xmm0, %xmm1
; SSE3-NEXT: haddps %xmm1, %xmm1
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; AVX-LABEL: hadd32_4_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%x227 = fadd <4 x float> %x225, %x226
%x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%x229 = fadd <4 x float> %x227, %x228
%x230 = extractelement <4 x float> %x229, i32 0
ret float %x230
}
define float @hadd32_8_pgso(<8 x float> %x225) !prof !14 {
; SSE3-LABEL: hadd32_8_pgso:
; SSE3: # %bb.0:
; SSE3-NEXT: movaps %xmm0, %xmm1
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE3-NEXT: addps %xmm0, %xmm1
; SSE3-NEXT: haddps %xmm1, %xmm1
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; AVX-LABEL: hadd32_8_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x227 = fadd <8 x float> %x225, %x226
%x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x229 = fadd <8 x float> %x227, %x228
%x230 = extractelement <8 x float> %x229, i32 0
ret float %x230
}
define float @hadd32_16_pgso(<16 x float> %x225) !prof !14 {
; SSE3-LABEL: hadd32_16_pgso:
; SSE3: # %bb.0:
; SSE3-NEXT: movaps %xmm0, %xmm1
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE3-NEXT: addps %xmm0, %xmm1
; SSE3-NEXT: haddps %xmm1, %xmm1
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; AVX-LABEL: hadd32_16_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x227 = fadd <16 x float> %x225, %x226
%x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x229 = fadd <16 x float> %x227, %x228
%x230 = extractelement <16 x float> %x229, i32 0
ret float %x230
}
define float @partial_reduction_fadd_v8f32(<8 x float> %x) {
; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32:
; SSE3-SLOW: # %bb.0:
@ -2115,3 +2189,20 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) {
%r = extractelement <16 x float> %x0123, i32 0
ret float %r
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -73,6 +73,68 @@ if.end: ; preds = %if.then, %entry
ret i32 0
}
; Test PGSO to make sure immediates with multiple users don't get pulled in to
; instructions.
define i32 @foo_pgso() !prof !14 {
; X86-LABEL: foo_pgso:
; X86: # %bb.0: # %entry
; X86-NEXT: movl $1234, %eax # imm = 0x4D2
; X86-NEXT: movl %eax, a
; X86-NEXT: movl %eax, b
; X86-NEXT: movl $12, %eax
; X86-NEXT: movl %eax, c
; X86-NEXT: cmpl %eax, e
; X86-NEXT: jne .LBB1_2
; X86-NEXT: # %bb.1: # %if.then
; X86-NEXT: movl $1, x
; X86-NEXT: .LBB1_2: # %if.end
; X86-NEXT: movl $1234, f # imm = 0x4D2
; X86-NEXT: movl $555, %eax # imm = 0x22B
; X86-NEXT: movl %eax, h
; X86-NEXT: addl %eax, i
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: foo_pgso:
; X64: # %bb.0: # %entry
; X64-NEXT: movl $1234, %eax # imm = 0x4D2
; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl $12, %eax
; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: cmpl %eax, {{.*}}(%rip)
; X64-NEXT: jne .LBB1_2
; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: movl $1, {{.*}}(%rip)
; X64-NEXT: .LBB1_2: # %if.end
; X64-NEXT: movl $1234, {{.*}}(%rip) # imm = 0x4D2
; X64-NEXT: movl $555, %eax # imm = 0x22B
; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: addl %eax, {{.*}}(%rip)
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
entry:
store i32 1234, i32* @a
store i32 1234, i32* @b
store i32 12, i32* @c
%0 = load i32, i32* @e
%cmp = icmp eq i32 %0, 12
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
store i32 1, i32* @x
br label %if.end
; New block.. Make sure 1234 isn't live across basic blocks from before.
if.end: ; preds = %if.then, %entry
store i32 1234, i32* @f
store i32 555, i32* @h
%1 = load i32, i32* @i
%add1 = add nsw i32 %1, 555
store i32 %add1, i32* @i
ret i32 0
}
; Test -O2 to make sure that all immediates get pulled in to their users.
define i32 @foo2() {
; X86-LABEL: foo2:
@ -124,3 +186,47 @@ entry:
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i1 false)
ret void
}
; memset gets lowered in DAG. Constant merging should hoist all the
; immediates used to store to the individual memory locations. Make
; sure we don't directly store the immediates.
define void @foomemset_pgso() !prof !14 {
; X86-LABEL: foomemset_pgso:
; X86: # %bb.0: # %entry
; X86-NEXT: movl $555819297, %eax # imm = 0x21212121
; X86-NEXT: movl %eax, AA+20
; X86-NEXT: movl %eax, AA+16
; X86-NEXT: movl %eax, AA+12
; X86-NEXT: movl %eax, AA+8
; X86-NEXT: movl %eax, AA+4
; X86-NEXT: movl %eax, AA
; X86-NEXT: retl
;
; X64-LABEL: foomemset_pgso:
; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $2387225703656530209, %rax # imm = 0x2121212121212121
; X64-NEXT: movq %rax, AA+{{.*}}(%rip)
; X64-NEXT: movq %rax, AA+{{.*}}(%rip)
; X64-NEXT: movq %rax, {{.*}}(%rip)
; X64-NEXT: retq
entry:
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i1 false)
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -19,6 +19,19 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
ret i1 %cmp
}
define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 {
; CHECK-LABEL: imm_multiple_users_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
store i64 -1, i64* %b, align 8
%cmp = icmp eq i64 %a, -1
ret i1 %cmp
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
; Inlined memsets requiring multiple same-sized stores should be lowered using
@ -34,3 +47,31 @@ define void @memset_zero(i8* noalias nocapture %D) optsize {
tail call void @llvm.memset.p0i8.i64(i8* %D, i8 0, i64 15, i1 false)
ret void
}
define void @memset_zero_pgso(i8* noalias nocapture %D) !prof !14 {
; CHECK-LABEL: memset_zero_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movq %rax, 7(%rdi)
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
tail call void @llvm.memset.p0i8.i64(i8* %D, i8 0, i64 15, i1 false)
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -269,6 +269,35 @@ exit:
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
; CHECK-LABEL: slightly_more_involved_2_pgso:
; CHECK-NOT: jmp .LBB6_1
; CHECK: .LBB6_1:
; CHECK-NEXT: callq body
define void @slightly_more_involved_2_pgso() norecurse nounwind readnone uwtable !prof !14 {
entry:
br label %loop
loop:
call void @body()
%t0 = call i32 @get()
%t1 = icmp slt i32 %t0, 2
br i1 %t1, label %block_a, label %bb
bb:
%t2 = call i32 @get()
%t3 = icmp slt i32 %t2, 99
br i1 %t3, label %exit, label %loop
block_a:
call void @bar99()
br label %loop
exit:
call void @exit()
ret void
}
declare void @bar99() nounwind
declare void @bar100() nounwind
declare void @bar101() nounwind
@ -281,3 +310,20 @@ declare i32 @get() nounwind
declare void @block_a_true_func() nounwind
declare void @block_a_false_func() nounwind
declare void @block_a_merge_func() nounwind
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -30,6 +30,21 @@ entry:
; CHECK64-NEXT: retq
}
define i32 @one32_pgso() !prof !14 {
entry:
ret i32 1
; CHECK32-LABEL: one32_pgso:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: retl
; FIXME: Figure out the best approach in 64-bit mode.
; CHECK64-LABEL: one32_pgso:
; CHECK64: movl $1, %eax
; CHECK64-NEXT: retq
}
define i32 @one32_minsize() minsize {
entry:
ret i32 1
@ -107,6 +122,16 @@ entry:
; CHECK32-NEXT: retl
}
define i32 @minus_one32_pgso() !prof !14 {
entry:
ret i32 -1
; CHECK32-LABEL: minus_one32_pgso:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: retl
}
define i32 @minus_one32_minsize() minsize {
entry:
ret i32 -1
@ -140,6 +165,28 @@ entry:
; CHECK32-NEXT: retl
}
define i16 @one16_pgso() !prof !14 {
entry:
ret i16 1
; CHECK32-LABEL: one16_pgso:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}
define i16 @minus_one16_pgso() !prof !14 {
entry:
ret i16 -1
; CHECK32-LABEL: minus_one16_pgso:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}
define i32 @minus_five32() minsize {
entry:
ret i32 -5
@ -213,4 +260,72 @@ entry:
; CHECK32: retl
}
define i32 @rematerialize_minus_one_pgso() !prof !14 {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; -1 should be re-materialized here instead of getting spilled above.
ret i32 -1
; CHECK32-LABEL: rematerialize_minus_one_pgso
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NOT: %eax
; CHECK32: retl
}
define i32 @rematerialize_minus_one_eflags_pgso(i32 %x) !prof !14 {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; Define eflags.
%a = icmp ne i32 %x, 123
%b = zext i1 %a to i32
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
; It must therefore not use the xor-dec lowering.
%c = select i1 %a, i32 %b, i32 -1
ret i32 %c
; CHECK32-LABEL: rematerialize_minus_one_eflags_pgso
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: cmpl
; CHECK32: setne
; CHECK32-NOT: xorl
; CHECK32: movl $-1
; CHECK32: cmov
; CHECK32: retl
}
declare x86_thiscallcc void @f(i32)
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

File diff suppressed because it is too large Load Diff

View File

@ -139,6 +139,36 @@ entry:
ret void
}
define void @test3_pgso(i8* nocapture %A, i8* nocapture %B) nounwind noredzone !prof !14 {
; LINUX-LABEL: test3_pgso:
; LINUX: # %bb.0: # %entry
; LINUX-NEXT: movl $64, %edx
; LINUX-NEXT: jmp memcpy # TAILCALL
;
; DARWIN-LABEL: test3_pgso:
; DARWIN: ## %bb.0: ## %entry
; DARWIN-NEXT: movq 56(%rsi), %rax
; DARWIN-NEXT: movq %rax, 56(%rdi)
; DARWIN-NEXT: movq 48(%rsi), %rax
; DARWIN-NEXT: movq %rax, 48(%rdi)
; DARWIN-NEXT: movq 40(%rsi), %rax
; DARWIN-NEXT: movq %rax, 40(%rdi)
; DARWIN-NEXT: movq 32(%rsi), %rax
; DARWIN-NEXT: movq %rax, 32(%rdi)
; DARWIN-NEXT: movq 24(%rsi), %rax
; DARWIN-NEXT: movq %rax, 24(%rdi)
; DARWIN-NEXT: movq 16(%rsi), %rax
; DARWIN-NEXT: movq %rax, 16(%rdi)
; DARWIN-NEXT: movq (%rsi), %rax
; DARWIN-NEXT: movq 8(%rsi), %rcx
; DARWIN-NEXT: movq %rcx, 8(%rdi)
; DARWIN-NEXT: movq %rax, (%rdi)
; DARWIN-NEXT: retq
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i1 false)
ret void
}
define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
; DARWIN-LABEL: test3_minsize:
; DARWIN: ## %bb.0:
@ -506,3 +536,20 @@ define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind
tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* align 8 %a, i8 addrspace(256)* align 8 %b, i64 16, i1 false)
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -86,6 +86,39 @@ define double @pow_wrapper_optsize(double %a) optsize {
ret double %ret
}
define double @pow_wrapper_pgso(double %a) !prof !14 {
; X86-X87-LABEL: pow_wrapper_pgso:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: .cfi_def_cfa_offset 16
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fstpl (%esp)
; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp)
; X86-X87-NEXT: calll __powidf2
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: .cfi_def_cfa_offset 4
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: pow_wrapper_pgso:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: calll __powidf2
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; X64-LABEL: pow_wrapper_pgso:
; X64: # %bb.0:
; X64-NEXT: movl $15, %edi
; X64-NEXT: jmp __powidf2 # TAILCALL
%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
ret double %ret
}
define double @pow_wrapper_minsize(double %a) minsize {
; X86-X87-LABEL: pow_wrapper_minsize:
; X86-X87: # %bb.0:
@ -124,3 +157,19 @@ define double @pow_wrapper_minsize(double %a) minsize {
declare double @llvm.powi.f64(double, i32) nounwind readonly
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -252,3 +252,60 @@ define double @test12(double* %xptr) nounwind optsize {
%call = tail call double @trunc(double %x) nounwind readnone
ret double %call
}
define float @test11_pgso(float* %xptr) nounwind !prof !14 {
; CHECK-SSE-LABEL: test11_pgso:
; CHECK-SSE: ## %bb.0:
; CHECK-SSE-NEXT: roundss $11, (%rdi), %xmm0
; CHECK-SSE-NEXT: retq
;
; CHECK-AVX-LABEL: test11_pgso:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test11_pgso:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load float, float* %xptr
%call = tail call float @truncf(float %x) nounwind readnone
ret float %call
}
define double @test12_pgso(double* %xptr) nounwind !prof !14 {
; CHECK-SSE-LABEL: test12_pgso:
; CHECK-SSE: ## %bb.0:
; CHECK-SSE-NEXT: roundsd $11, (%rdi), %xmm0
; CHECK-SSE-NEXT: retq
;
; CHECK-AVX-LABEL: test12_pgso:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test12_pgso:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load double, double* %xptr
%call = tail call double @trunc(double %x) nounwind readnone
ret double %call
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -0,0 +1,321 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
declare void @bar()
define void @test1(i32* nocapture %X) nounwind !prof !14 {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $47, (%rdi)
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%tmp1 = load i32, i32* %X, align 4
%and = and i32 %tmp1, 255
%cmp = icmp eq i32 %and, 47
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test2(i32 %X) nounwind !prof !14 {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $47, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 47
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test3(i32 %X) nounwind !prof !14 {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $-1, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 255
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
; PR16083
define i1 @test4(i64 %a, i32 %b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: je .LBB3_1
; CHECK-NEXT: # %bb.2: # %lor.end
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB3_1: # %lor.rhs
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
%tobool = icmp ne i32 %b, 0
br i1 %tobool, label %lor.end, label %lor.rhs
lor.rhs: ; preds = %entry
%and = and i64 0, %a
%tobool1 = icmp ne i64 %and, 0
br label %lor.end
lor.end: ; preds = %lor.rhs, %entry
%p = phi i1 [ true, %entry ], [ %tobool1, %lor.rhs ]
ret i1 %p
}
@x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4
; PR16551
define void @test5(i32 %X) nounwind !prof !14 {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl x+{{.*}}(%rip), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movzwl x+{{.*}}(%rip), %ecx
; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: cmpl $1, %ecx
; CHECK-NEXT: jne bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
%bf.lshr = lshr i56 %bf.load, 32
%bf.cast = trunc i56 %bf.lshr to i32
%cmp = icmp ne i32 %bf.cast, 1
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test2_1(i32 %X) nounwind !prof !14 {
; CHECK-LABEL: test2_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: cmpl $256, %eax # imm = 0x100
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 256
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_1(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $1, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 1
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_47(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_47:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $47, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 47
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_127(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_127:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $127, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 127
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_neg1(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_neg1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $-1, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -1
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_neg2(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_neg2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $-2, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -2
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_neg127(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_neg127:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $-127, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -127
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_neg128(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_neg128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb $-128, %dil
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -128
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
define void @test_sext_i8_icmp_255(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_255:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 255
br i1 %cmp, label %if.then, label %if.end
if.then:
tail call void @bar() nounwind
br label %if.end
if.end:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -54,6 +54,26 @@ define i32 @dec_size(i32 %x) optsize {
ret i32 %r
}
define i32 @inc_pgso(i32 %x) !prof !14 {
; CHECK-LABEL: inc_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: incl %eax
; CHECK-NEXT: retl
%r = add i32 %x, 1
ret i32 %r
}
define i32 @dec_pgso(i32 %x) !prof !14 {
; CHECK-LABEL: dec_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: decl %eax
; CHECK-NEXT: retl
%r = add i32 %x, -1
ret i32 %r
}
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
declare void @other(i32* ) nounwind;
@ -62,20 +82,20 @@ define void @cond_ae_to_cond_ne(i32* %p) nounwind {
; INCDEC: # %bb.0: # %entry
; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax
; INCDEC-NEXT: incl (%eax)
; INCDEC-NEXT: jne .LBB4_1
; INCDEC-NEXT: jne .LBB6_1
; INCDEC-NEXT: # %bb.2: # %if.end4
; INCDEC-NEXT: jmp other # TAILCALL
; INCDEC-NEXT: .LBB4_1: # %return
; INCDEC-NEXT: .LBB6_1: # %return
; INCDEC-NEXT: retl
;
; ADD-LABEL: cond_ae_to_cond_ne:
; ADD: # %bb.0: # %entry
; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax
; ADD-NEXT: addl $1, (%eax)
; ADD-NEXT: jne .LBB4_1
; ADD-NEXT: jne .LBB6_1
; ADD-NEXT: # %bb.2: # %if.end4
; ADD-NEXT: jmp other # TAILCALL
; ADD-NEXT: .LBB4_1: # %return
; ADD-NEXT: .LBB6_1: # %return
; ADD-NEXT: retl
entry:
%t0 = load i32, i32* %p, align 8
@ -109,10 +129,10 @@ define void @test_tail_call(i32* %ptr) nounwind {
; INCDEC-NEXT: incb a
; INCDEC-NEXT: sete d
; INCDEC-NEXT: testb %al, %al
; INCDEC-NEXT: jne .LBB5_2
; INCDEC-NEXT: jne .LBB7_2
; INCDEC-NEXT: # %bb.1: # %then
; INCDEC-NEXT: jmp external_a # TAILCALL
; INCDEC-NEXT: .LBB5_2: # %else
; INCDEC-NEXT: .LBB7_2: # %else
; INCDEC-NEXT: jmp external_b # TAILCALL
;
; ADD-LABEL: test_tail_call:
@ -123,10 +143,10 @@ define void @test_tail_call(i32* %ptr) nounwind {
; ADD-NEXT: addb $1, a
; ADD-NEXT: sete d
; ADD-NEXT: testb %al, %al
; ADD-NEXT: jne .LBB5_2
; ADD-NEXT: jne .LBB7_2
; ADD-NEXT: # %bb.1: # %then
; ADD-NEXT: jmp external_a # TAILCALL
; ADD-NEXT: .LBB5_2: # %else
; ADD-NEXT: .LBB7_2: # %else
; ADD-NEXT: jmp external_b # TAILCALL
entry:
%val = load i32, i32* %ptr
@ -152,3 +172,19 @@ else:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -17,6 +17,17 @@ define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
ret <2 x double> %add
}
define <2 x double> @splat_v2f64_pgso(<2 x double> %x) !prof !14 {
; CHECK-LABEL: splat_v2f64_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
; CHECK-NEXT: # xmm1 = mem[0,0]
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%add = fadd <2 x double> %x, <double 1.0, double 1.0>
ret <2 x double> %add
}
define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
; CHECK-LABEL: splat_v4f64:
; CHECK: # %bb.0:
@ -27,6 +38,16 @@ define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
ret <4 x double> %add
}
define <4 x double> @splat_v4f64_pgso(<4 x double> %x) !prof !14 {
; CHECK-LABEL: splat_v4f64_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
ret <4 x double> %add
}
define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
; CHECK-LABEL: splat_v4f32:
; CHECK: # %bb.0:
@ -37,6 +58,16 @@ define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
ret <4 x float> %add
}
define <4 x float> @splat_v4f32_pgso(<4 x float> %x) !prof !14 {
; CHECK-LABEL: splat_v4f32_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
ret <4 x float> %add
}
define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
; CHECK-LABEL: splat_v8f32:
; CHECK: # %bb.0:
@ -47,6 +78,16 @@ define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
ret <8 x float> %add
}
define <8 x float> @splat_v8f32_pgso(<8 x float> %x) !prof !14 {
; CHECK-LABEL: splat_v8f32_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
ret <8 x float> %add
}
; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
@ -66,6 +107,23 @@ define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
ret <2 x i64> %add
}
define <2 x i64> @splat_v2i64_pgso(<2 x i64> %x) !prof !14 {
; AVX-LABEL: splat_v2i64_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v2i64_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%add = add <2 x i64> %x, <i64 2, i64 2>
ret <2 x i64> %add
}
; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
; and then we fake it: use vmovddup to splat 64-bit value.
define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
@ -88,6 +146,26 @@ define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
ret <4 x i64> %add
}
define <4 x i64> @splat_v4i64_pgso(<4 x i64> %x) !prof !14 {
; AVX-LABEL: splat_v4i64_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2]
; AVX-NEXT: # xmm2 = mem[0,0]
; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v4i64_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
ret <4 x i64> %add
}
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
; AVX-LABEL: splat_v4i32:
@ -105,6 +183,22 @@ define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
ret <4 x i32> %add
}
define <4 x i32> @splat_v4i32_pgso(<4 x i32> %x) !prof !14 {
; AVX-LABEL: splat_v4i32_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v4i32_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %add
}
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
; AVX-LABEL: splat_v8i32:
@ -125,6 +219,25 @@ define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
ret <8 x i32> %add
}
define <8 x i32> @splat_v8i32_pgso(<8 x i32> %x) !prof !14 {
; AVX-LABEL: splat_v8i32_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v8i32_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
ret <8 x i32> %add
}
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
; AVX-LABEL: splat_v8i16:
@ -141,6 +254,21 @@ define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
ret <8 x i16> %add
}
define <8 x i16> @splat_v8i16_pgso(<8 x i16> %x) !prof !14 {
; AVX-LABEL: splat_v8i16_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v8i16_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <8 x i16> %add
}
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
; AVX-LABEL: splat_v16i16:
@ -161,6 +289,25 @@ define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
ret <16 x i16> %add
}
define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 {
; AVX-LABEL: splat_v16i16_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v16i16_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %add
}
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
; AVX-LABEL: splat_v16i8:
@ -177,6 +324,21 @@ define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
ret <16 x i8> %add
}
define <16 x i8> @splat_v16i8_pgso(<16 x i8> %x) !prof !14 {
; AVX-LABEL: splat_v16i8_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v16i8_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <16 x i8> %add
}
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
; AVX-LABEL: splat_v32i8:
@ -197,6 +359,25 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
ret <32 x i8> %add
}
define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
; AVX-LABEL: splat_v32i8_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX2-LABEL: splat_v32i8_pgso:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %add
}
; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
; loadi64 with multiple uses.
@ -238,3 +419,20 @@ entry:
attributes #0 = { optsize }
attributes #1 = { minsize }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -19,6 +19,23 @@ entry:
}
define void @zero_pgso(i32* %p) !prof !14 {
; CHECK32-LABEL: zero_pgso:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: movl $0, (%eax)
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: zero_pgso:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movl $0, (%rdi)
; CHECK64-NEXT: retq
entry:
store i32 0, i32* %p
ret void
}
define void @minus_one_optsize(i32* %p) optsize {
; CHECK32-LABEL: minus_one_optsize:
; CHECK32: # %bb.0: # %entry
@ -36,6 +53,22 @@ entry:
}
define void @minus_one_pgso(i32* %p) !prof !14 {
; CHECK32-LABEL: minus_one_pgso:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: movl $-1, (%eax)
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: minus_one_pgso:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movl $-1, (%rdi)
; CHECK64-NEXT: retq
entry:
store i32 -1, i32* %p
ret void
}
define void @zero_64(i64* %p) minsize {
; CHECK32-LABEL: zero_64:
@ -244,3 +277,20 @@ entry:
store volatile i16 -1, i16* %p
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -79,3 +79,72 @@ return: ret void
; CHECK: ja
; CHECK: jmpq *.LJTI
}
define void @dense_optsize(i32 %x) optsize {
entry:
switch i32 %x, label %return [
i32 12, label %bb0
i32 4, label %bb1
i32 16, label %bb1
i32 20, label %bb2
i32 8, label %bb3
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 1) br label %return
bb3: tail call void @g(i32 2) br label %return
return: ret void
; Lowered as branches.
; CHECK-LABEL: dense_optsize
; CHECK: cmpl $11
; CHECK: cmpl $20
; CHECK: cmpl $16
; CHECK: cmpl $12
; CHECK: cmpl $4
; CHECK: cmpl $8
; CHECK: retq
}
define void @dense_pgso(i32 %x) !prof !14 {
entry:
switch i32 %x, label %return [
i32 12, label %bb0
i32 4, label %bb1
i32 16, label %bb1
i32 20, label %bb2
i32 8, label %bb3
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 1) br label %return
bb3: tail call void @g(i32 2) br label %return
return: ret void
; Lowered as branches.
; CHECK-LABEL: dense_pgso
; CHECK: cmpl $11
; CHECK: cmpl $20
; CHECK: cmpl $16
; CHECK: cmpl $12
; CHECK: cmpl $4
; CHECK: cmpl $8
; CHECK: retq
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -473,6 +473,47 @@ return:
ret void
}
define void @one_pgso(i32 %v) nounwind !prof !14 {
; CHECK-LABEL: one_pgso:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB6_3
; CHECK-NEXT: # %bb.1: # %bby
; CHECK-NEXT: cmpl $16, %edi
; CHECK-NEXT: je .LBB6_4
; CHECK-NEXT: # %bb.2: # %bb7
; CHECK-NEXT: jmp tail_call_me # TAILCALL
; CHECK-NEXT: .LBB6_3: # %bbx
; CHECK-NEXT: cmpl $128, %edi
; CHECK-NEXT: jne tail_call_me # TAILCALL
; CHECK-NEXT: .LBB6_4: # %return
; CHECK-NEXT: retq
entry:
%0 = icmp eq i32 %v, 0
br i1 %0, label %bbx, label %bby
bby:
switch i32 %v, label %bb7 [
i32 16, label %return
]
bb7:
tail call void @tail_call_me()
ret void
bbx:
switch i32 %v, label %bb12 [
i32 128, label %return
]
bb12:
tail call void @tail_call_me()
ret void
return:
ret void
}
; two - Same as one, but with two instructions in the common
; tail instead of one. This is too much to be merged, given
; the optsize attribute.
@ -484,49 +525,6 @@ define void @two() nounwind optsize {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB6_1
; CHECK-NEXT: # %bb.2: # %return
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB6_1: # %bb7
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: movl $1, {{.*}}(%rip)
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
bby:
switch i32 undef, label %bb7 [
i32 16, label %return
]
bb7:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
bbx:
switch i32 undef, label %bb12 [
i32 128, label %return
]
bb12:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
return:
ret void
}
; two_minsize - Same as two, but with minsize instead of optsize.
define void @two_minsize() nounwind minsize {
; CHECK-LABEL: two_minsize:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB7_1
; CHECK-NEXT: # %bb.2: # %return
; CHECK-NEXT: retq
@ -561,6 +559,90 @@ return:
ret void
}
define void @two_pgso() nounwind !prof !14 {
; CHECK-LABEL: two_pgso:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB8_1
; CHECK-NEXT: # %bb.2: # %return
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB8_1: # %bb7
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: movl $1, {{.*}}(%rip)
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
bby:
switch i32 undef, label %bb7 [
i32 16, label %return
]
bb7:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
bbx:
switch i32 undef, label %bb12 [
i32 128, label %return
]
bb12:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
return:
ret void
}
; two_minsize - Same as two, but with minsize instead of optsize.
define void @two_minsize() nounwind minsize {
; CHECK-LABEL: two_minsize:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB9_1
; CHECK-NEXT: # %bb.2: # %return
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB9_1: # %bb7
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: movl $1, {{.*}}(%rip)
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
bby:
switch i32 undef, label %bb7 [
i32 16, label %return
]
bb7:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
bbx:
switch i32 undef, label %bb12 [
i32 128, label %return
]
bb12:
store volatile i32 0, i32* @XYZ
store volatile i32 1, i32* @XYZ
unreachable
return:
ret void
}
; two_nosize - Same as two, but without the optsize attribute.
; Now two instructions are enough to be tail-duplicated.
@ -568,20 +650,20 @@ define void @two_nosize(i32 %x, i32 %y, i32 %z) nounwind {
; CHECK-LABEL: two_nosize:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB8_3
; CHECK-NEXT: je .LBB10_3
; CHECK-NEXT: # %bb.1: # %bby
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: je .LBB8_4
; CHECK-NEXT: je .LBB10_4
; CHECK-NEXT: # %bb.2: # %bb7
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: jmp tail_call_me # TAILCALL
; CHECK-NEXT: .LBB8_3: # %bbx
; CHECK-NEXT: .LBB10_3: # %bbx
; CHECK-NEXT: cmpl $-1, %edx
; CHECK-NEXT: je .LBB8_4
; CHECK-NEXT: je .LBB10_4
; CHECK-NEXT: # %bb.5: # %bb12
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: jmp tail_call_me # TAILCALL
; CHECK-NEXT: .LBB8_4: # %return
; CHECK-NEXT: .LBB10_4: # %return
; CHECK-NEXT: retq
entry:
%0 = icmp eq i32 %x, 0
@ -621,11 +703,11 @@ define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: cmovgq %rdi, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: jle .LBB9_2
; CHECK-NEXT: jle .LBB11_2
; CHECK-NEXT: # %bb.1: # %bb.nph
; CHECK-NEXT: imulq %rdi, %rsi
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: .LBB9_2: # %for.end
; CHECK-NEXT: .LBB11_2: # %for.end
; CHECK-NEXT: retq
entry:
%cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
@ -654,24 +736,24 @@ define void @merge_aborts() {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB10_5
; CHECK-NEXT: je .LBB12_5
; CHECK-NEXT: # %bb.1: # %cont1
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB10_5
; CHECK-NEXT: je .LBB12_5
; CHECK-NEXT: # %bb.2: # %cont2
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB10_5
; CHECK-NEXT: je .LBB12_5
; CHECK-NEXT: # %bb.3: # %cont3
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB10_5
; CHECK-NEXT: je .LBB12_5
; CHECK-NEXT: # %bb.4: # %cont4
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB10_5: # %abort1
; CHECK-NEXT: .LBB12_5: # %abort1
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq abort
entry:
@ -714,27 +796,27 @@ define void @merge_alternating_aborts() {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB11_5
; CHECK-NEXT: je .LBB13_5
; CHECK-NEXT: # %bb.1: # %cont1
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB11_6
; CHECK-NEXT: je .LBB13_6
; CHECK-NEXT: # %bb.2: # %cont2
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB11_5
; CHECK-NEXT: je .LBB13_5
; CHECK-NEXT: # %bb.3: # %cont3
; CHECK-NEXT: callq qux
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB11_6
; CHECK-NEXT: je .LBB13_6
; CHECK-NEXT: # %bb.4: # %cont4
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB11_5: # %abort1
; CHECK-NEXT: .LBB13_5: # %abort1
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq abort
; CHECK-NEXT: .LBB11_6: # %abort2
; CHECK-NEXT: .LBB13_6: # %abort2
; CHECK-NEXT: callq alt_abort
entry:
%c1 = call i1 @qux()
@ -763,3 +845,20 @@ abort4:
cont4:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -49,6 +49,30 @@ no:
ret void
}
define void @test64_pgso(i64 inreg %x) !prof !14 {
; CHECK-LABEL: test64_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB2_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB2_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i64 %x, 2048
%s = icmp eq i64 %t, 0
br i1 %s, label %yes, label %no
yes:
call void @bar()
ret void
no:
ret void
}
; This test is identical to test64 above with only the destination of the br
; reversed. This somehow causes the two functions to get slightly different
; initial IR. One has an extra invert of the setcc. This previous caused one
@ -60,10 +84,10 @@ define void @test64_2(i64 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: je .LBB2_2
; CHECK-NEXT: je .LBB3_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB2_2: # %no
; CHECK-NEXT: .LBB3_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -84,10 +108,34 @@ define void @test64_optsize_2(i64 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB3_2
; CHECK-NEXT: jae .LBB4_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB3_2: # %no
; CHECK-NEXT: .LBB4_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i64 %x, 2048
%s = icmp eq i64 %t, 0
br i1 %s, label %no, label %yes
yes:
call void @bar()
ret void
no:
ret void
}
define void @test64_pgso_2(i64 inreg %x) !prof !14 {
; CHECK-LABEL: test64_pgso_2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB5_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB5_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -108,10 +156,10 @@ define void @test64_3(i64 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jb .LBB4_2
; CHECK-NEXT: jb .LBB6_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB4_2: # %no
; CHECK-NEXT: .LBB6_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -132,10 +180,34 @@ define void @test64_optsize_3(i64 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jb .LBB5_2
; CHECK-NEXT: jb .LBB7_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB5_2: # %no
; CHECK-NEXT: .LBB7_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i64 %x, 4294967296
%s = icmp eq i64 %t, 0
br i1 %s, label %yes, label %no
yes:
call void @bar()
ret void
no:
ret void
}
define void @test64_pgso_3(i64 inreg %x) !prof !14 {
; CHECK-LABEL: test64_pgso_3:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jb .LBB8_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB8_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -156,10 +228,10 @@ define void @test64_4(i64 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jae .LBB6_2
; CHECK-NEXT: jae .LBB9_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB6_2: # %no
; CHECK-NEXT: .LBB9_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -180,10 +252,34 @@ define void @test64_optsize_4(i64 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jae .LBB7_2
; CHECK-NEXT: jae .LBB10_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB7_2: # %no
; CHECK-NEXT: .LBB10_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i64 %x, 4294967296
%s = icmp eq i64 %t, 0
br i1 %s, label %no, label %yes
yes:
call void @bar()
ret void
no:
ret void
}
define void @test64_pgso_4(i64 inreg %x) !prof !14 {
; CHECK-LABEL: test64_pgso_4:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btq $32, %rdi
; CHECK-NEXT: jae .LBB11_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB11_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -204,10 +300,10 @@ define void @test32(i32 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: jne .LBB12_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB8_2: # %no
; CHECK-NEXT: .LBB12_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -228,10 +324,10 @@ define void @test32_optsize(i32 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB9_2
; CHECK-NEXT: jb .LBB13_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB9_2: # %no
; CHECK-NEXT: .LBB13_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -252,10 +348,10 @@ define void @test32_2(i32 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: je .LBB10_2
; CHECK-NEXT: je .LBB14_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB10_2: # %no
; CHECK-NEXT: .LBB14_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -276,10 +372,34 @@ define void @test32_optsize_2(i32 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB11_2
; CHECK-NEXT: jae .LBB15_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB11_2: # %no
; CHECK-NEXT: .LBB15_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i32 %x, 2048
%s = icmp eq i32 %t, 0
br i1 %s, label %no, label %yes
yes:
call void @bar()
ret void
no:
ret void
}
define void @test32_pgso_2(i32 inreg %x) !prof !14 {
; CHECK-LABEL: test32_pgso_2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB16_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB16_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -300,10 +420,10 @@ define void @test16(i16 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: jne .LBB12_2
; CHECK-NEXT: jne .LBB17_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB12_2: # %no
; CHECK-NEXT: .LBB17_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -324,10 +444,34 @@ define void @test16_optsize(i16 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB13_2
; CHECK-NEXT: jb .LBB18_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB13_2: # %no
; CHECK-NEXT: .LBB18_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i16 %x, 2048
%s = icmp eq i16 %t, 0
br i1 %s, label %yes, label %no
yes:
call void @bar()
ret void
no:
ret void
}
define void @test16_pgso(i16 inreg %x) !prof !14 {
; CHECK-LABEL: test16_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jb .LBB19_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB19_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -348,10 +492,10 @@ define void @test16_2(i16 inreg %x) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
; CHECK-NEXT: je .LBB14_2
; CHECK-NEXT: je .LBB20_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB14_2: # %no
; CHECK-NEXT: .LBB20_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -372,10 +516,34 @@ define void @test16_optsize_2(i16 inreg %x) optsize {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB15_2
; CHECK-NEXT: jae .LBB21_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB15_2: # %no
; CHECK-NEXT: .LBB21_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%t = and i16 %x, 2048
%s = icmp eq i16 %t, 0
br i1 %s, label %no, label %yes
yes:
call void @bar()
ret void
no:
ret void
}
define void @test16_pgso_2(i16 inreg %x) !prof !14 {
; CHECK-LABEL: test16_pgso_2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: btl $11, %edi
; CHECK-NEXT: jae .LBB22_2
; CHECK-NEXT: # %bb.1: # %yes
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB22_2: # %no
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@ -512,3 +680,20 @@ define i32 @setcc_is_bit_set(i32 %x) {
}
declare void @bar()
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -2002,6 +2002,56 @@ define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
ret <8 x i32> %b
}
define <4 x double> @shuffle_v4f64_0zzz_pgso(<4 x double> %a) !prof !14 {
; ALL-LABEL: shuffle_v4f64_0zzz_pgso:
; ALL: # %bb.0:
; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; ALL-NEXT: retq
%b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x double> %b
}
define <4 x i64> @shuffle_v4i64_0zzz_pgso(<4 x i64> %a) !prof !14 {
; ALL-LABEL: shuffle_v4i64_0zzz_pgso:
; ALL: # %bb.0:
; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; ALL-NEXT: retq
%b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i64> %b
}
define <8 x float> @shuffle_v8f32_0zzzzzzz_pgso(<8 x float> %a) !prof !14 {
; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512VL-NEXT: retq
%b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x float> %b
}
define <8 x i32> @shuffle_v8i32_0zzzzzzz_pgso(<8 x i32> %a) !prof !14 {
; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512VL-NEXT: retq
%b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i32> %b
}
define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) {
; ALL-LABEL: unpckh_v4i64:
; ALL: # %bb.0:
@ -2022,3 +2072,19 @@ define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
ret <4 x double> %unpckh
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -240,3 +240,140 @@ define i64 @xor4_optsize(i64 %x) optsize {
%a = xor i64 %x, 9223372036854775808 ; toggle bit 63
ret i64 %a
}
define i64 @and1_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: and1_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btrq $31, %rax
; CHECK-NEXT: retq
%a = and i64 %x, 18446744071562067967 ; clear bit 31
ret i64 %a
}
define i64 @and2_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: and2_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btrq $32, %rax
; CHECK-NEXT: retq
%a = and i64 %x, 18446744069414584319 ; clear bit 32
ret i64 %a
}
define i64 @and3_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: and3_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btrq $62, %rax
; CHECK-NEXT: retq
%a = and i64 %x, 13835058055282163711 ; clear bit 62
ret i64 %a
}
define i64 @and4_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: and4_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btrq $63, %rax
; CHECK-NEXT: retq
%a = and i64 %x, 9223372036854775807 ; clear bit 63
ret i64 %a
}
define i64 @or1_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: or1_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btsq $31, %rax
; CHECK-NEXT: retq
%a = or i64 %x, 2147483648 ; set bit 31
ret i64 %a
}
define i64 @or2_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: or2_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btsq $32, %rax
; CHECK-NEXT: retq
%a = or i64 %x, 4294967296 ; set bit 32
ret i64 %a
}
define i64 @or3_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: or3_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btsq $62, %rax
; CHECK-NEXT: retq
%a = or i64 %x, 4611686018427387904 ; set bit 62
ret i64 %a
}
define i64 @or4_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: or4_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btsq $63, %rax
; CHECK-NEXT: retq
%a = or i64 %x, 9223372036854775808 ; set bit 63
ret i64 %a
}
define i64 @xor1_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: xor1_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btcq $31, %rax
; CHECK-NEXT: retq
%a = xor i64 %x, 2147483648 ; toggle bit 31
ret i64 %a
}
define i64 @xor2_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: xor2_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btcq $32, %rax
; CHECK-NEXT: retq
%a = xor i64 %x, 4294967296 ; toggle bit 32
ret i64 %a
}
define i64 @xor3_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: xor3_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btcq $62, %rax
; CHECK-NEXT: retq
%a = xor i64 %x, 4611686018427387904 ; toggle bit 62
ret i64 %a
}
define i64 @xor4_pgso(i64 %x) !prof !14 {
; CHECK-LABEL: xor4_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: btcq $63, %rax
; CHECK-NEXT: retq
%a = xor i64 %x, 9223372036854775808 ; toggle bit 63
ret i64 %a
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -50,6 +50,19 @@ entry:
ret i64 %or
}
define i64 @_Z8lshift11mm_pgso(i64 %a, i64 %b) !prof !14 {
; CHECK-LABEL: _Z8lshift11mm_pgso:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shldq $11, %rsi, %rax
; CHECK-NEXT: retq
entry:
%shl = shl i64 %a, 11
%shr = lshr i64 %b, 53
%or = or i64 %shr, %shl
ret i64 %or
}
attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
; clang -O2 -c test2.cpp -emit-llvm -S
@ -78,3 +91,19 @@ entry:
attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -25,6 +25,26 @@ while.end: ; preds = %while.body
ret void
}
define void @f_pgso(i8* %p, i8* %q, i32* inalloca nocapture %unused) !prof !14 {
entry:
%g = alloca %struct.T, align 8
%r = alloca i32, align 8
store i32 0, i32* %r, align 4
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %p, i8* align 8 %q, i32 24, i1 false)
br label %while.body
while.body: ; preds = %while.body, %entry
%load = load i32, i32* %r, align 4
%dec = add nsw i32 %load, -1
store i32 %dec, i32* %r, align 4
call void @g(%struct.T* %g)
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body
while.end: ; preds = %while.body
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1
@ -46,5 +66,38 @@ declare void @g(%struct.T*)
; CHECK: testb %[[NE_REG]], %[[NE_REG]]
; CHECK: jne
; CHECK-LABEL: _f_pgso:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: andl $-8, %esp
; CHECK-NOT: movl %esp, %esi
; CHECK: rep;movsl
; CHECK: leal 8(%esp), %esi
; CHECK: decl (%esp)
; CHECK: setne %[[NE_REG:.*]]
; CHECK: pushl %esi
; CHECK: calll _g
; CHECK: addl $4, %esp
; CHECK: testb %[[NE_REG]], %[[NE_REG]]
; CHECK: jne
attributes #0 = { nounwind optsize }
attributes #1 = { argmemonly nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

View File

@ -152,6 +152,30 @@ rare.1:
br label %fallthrough
}
; Negative test - opt for size
define void @test6_pgso(i1 %cond, i64* %base) !prof !14 {
; CHECK-LABEL: @test6
entry:
; CHECK: %addr = getelementptr
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
br i1 %cond, label %if.then, label %fallthrough
if.then:
; CHECK-LABEL: if.then:
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
br i1 %cmp, label %rare.1, label %fallthrough
fallthrough:
ret void
rare.1:
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
; Make sure sinking two copies of addressing mode into different blocks works
; when there are cold paths for each.
@ -278,3 +302,20 @@ BB:
store i1 false, i1* %G23
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}