mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PGO][PGSO] Enable size optimizations in code gen / target passes for cold code.
Summary: Split off of D67120. Reviewers: davidxl Subscribers: hiraditya, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71288
This commit is contained in:
parent
c2d94cd107
commit
88791ed5ec
@ -29,7 +29,7 @@ cl::opt<bool> PGSOColdCodeOnly(
|
||||
"to cold code."));
|
||||
|
||||
cl::opt<bool> PGSOIRPassOrTestOnly(
|
||||
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(true),
|
||||
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
|
||||
cl::desc("Apply the profile guided size optimizations only"
|
||||
"to the IR passes or tests."));
|
||||
|
||||
|
128
test/CodeGen/AArch64/arm64-memset-to-bzero-pgso.ll
Normal file
128
test/CodeGen/AArch64/arm64-memset-to-bzero-pgso.ll
Normal file
@ -0,0 +1,128 @@
|
||||
; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \
|
||||
; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s
|
||||
; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
|
||||
|
||||
; CHECK-LABEL: fct1:
|
||||
; For small size (<= 256), we do not change memset to bzero.
|
||||
; CHECK-DARWIN: {{b|bl}} _memset
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct1(i8* nocapture %ptr) !prof !14 {
|
||||
entry:
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
|
||||
|
||||
; CHECK-LABEL: fct2:
|
||||
; When the size is bigger than 256, change into bzero.
|
||||
; CHECK-DARWIN: {{b|bl}} _bzero
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct2(i8* nocapture %ptr) !prof !14 {
|
||||
entry:
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fct3:
|
||||
; For unknown size, change to bzero.
|
||||
; CHECK-DARWIN: {{b|bl}} _bzero
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct3(i8* nocapture %ptr, i32 %unknown) !prof !14 {
|
||||
entry:
|
||||
%conv = sext i32 %unknown to i64
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fct4:
|
||||
; Size <= 256, no change.
|
||||
; CHECK-DARWIN: {{b|bl}} _memset
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct4(i8* %ptr) !prof !14 {
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i8* @__memset_chk(i8*, i32, i64, i64)
|
||||
|
||||
declare i64 @llvm.objectsize.i64(i8*, i1)
|
||||
|
||||
; CHECK-LABEL: fct5:
|
||||
; Size > 256, change.
|
||||
; CHECK-DARWIN: {{b|bl}} _bzero
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct5(i8* %ptr) !prof !14 {
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fct6:
|
||||
; Size = unknown, change.
|
||||
; CHECK-DARWIN: {{b|bl}} _bzero
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct6(i8* %ptr, i32 %unknown) !prof !14 {
|
||||
entry:
|
||||
%conv = sext i32 %unknown to i64
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Next functions check that memset is not turned into bzero
|
||||
; when the set constant is non-zero, whatever the given size.
|
||||
|
||||
; CHECK-LABEL: fct7:
|
||||
; memset with something that is not a zero, no change.
|
||||
; CHECK-DARWIN: {{b|bl}} _memset
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct7(i8* %ptr) !prof !14 {
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fct8:
|
||||
; memset with something that is not a zero, no change.
|
||||
; CHECK-DARWIN: {{b|bl}} _memset
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct8(i8* %ptr) !prof !14 {
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fct9:
|
||||
; memset with something that is not a zero, no change.
|
||||
; CHECK-DARWIN: {{b|bl}} _memset
|
||||
; CHECK-LINUX: {{b|bl}} memset
|
||||
define void @fct9(i8* %ptr, i32 %unknown) !prof !14 {
|
||||
entry:
|
||||
%conv = sext i32 %unknown to i64
|
||||
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
|
||||
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
@ -203,3 +203,136 @@ default: unreachable
|
||||
|
||||
return: ret void
|
||||
}
|
||||
|
||||
define i32 @jt1_optsize(i32 %a, i32 %b) optsize {
|
||||
entry:
|
||||
switch i32 %a, label %return [
|
||||
i32 1, label %bb1
|
||||
i32 2, label %bb2
|
||||
i32 3, label %bb3
|
||||
i32 4, label %bb4
|
||||
i32 5, label %bb5
|
||||
i32 6, label %bb6
|
||||
i32 7, label %bb7
|
||||
i32 8, label %bb8
|
||||
i32 9, label %bb9
|
||||
i32 10, label %bb10
|
||||
i32 11, label %bb11
|
||||
i32 12, label %bb12
|
||||
i32 13, label %bb13
|
||||
i32 14, label %bb14
|
||||
i32 15, label %bb15
|
||||
i32 16, label %bb16
|
||||
i32 17, label %bb17
|
||||
]
|
||||
; CHECK-LABEL: function jt1_optsize:
|
||||
; CHECK-NEXT: Jump Tables:
|
||||
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK0-NOT: %jump-table.1:
|
||||
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK4-NOT: %jump-table.1:
|
||||
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK8-NOT: %jump-table.1:
|
||||
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK16-NOT: %jump-table.1:
|
||||
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECKM1-NOT: %jump-table.1:
|
||||
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECKM3-NOT: %jump-table.1:
|
||||
; CHECK-DAG: End machine code for function jt1_optsize.
|
||||
|
||||
bb1: tail call void @ext(i32 1, i32 0) br label %return
|
||||
bb2: tail call void @ext(i32 2, i32 2) br label %return
|
||||
bb3: tail call void @ext(i32 3, i32 4) br label %return
|
||||
bb4: tail call void @ext(i32 4, i32 6) br label %return
|
||||
bb5: tail call void @ext(i32 5, i32 8) br label %return
|
||||
bb6: tail call void @ext(i32 6, i32 10) br label %return
|
||||
bb7: tail call void @ext(i32 7, i32 12) br label %return
|
||||
bb8: tail call void @ext(i32 8, i32 14) br label %return
|
||||
bb9: tail call void @ext(i32 9, i32 16) br label %return
|
||||
bb10: tail call void @ext(i32 1, i32 18) br label %return
|
||||
bb11: tail call void @ext(i32 2, i32 20) br label %return
|
||||
bb12: tail call void @ext(i32 3, i32 22) br label %return
|
||||
bb13: tail call void @ext(i32 4, i32 24) br label %return
|
||||
bb14: tail call void @ext(i32 5, i32 26) br label %return
|
||||
bb15: tail call void @ext(i32 6, i32 28) br label %return
|
||||
bb16: tail call void @ext(i32 7, i32 30) br label %return
|
||||
bb17: tail call void @ext(i32 8, i32 32) br label %return
|
||||
|
||||
return: ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @jt1_pgso(i32 %a, i32 %b) !prof !14 {
|
||||
entry:
|
||||
switch i32 %a, label %return [
|
||||
i32 1, label %bb1
|
||||
i32 2, label %bb2
|
||||
i32 3, label %bb3
|
||||
i32 4, label %bb4
|
||||
i32 5, label %bb5
|
||||
i32 6, label %bb6
|
||||
i32 7, label %bb7
|
||||
i32 8, label %bb8
|
||||
i32 9, label %bb9
|
||||
i32 10, label %bb10
|
||||
i32 11, label %bb11
|
||||
i32 12, label %bb12
|
||||
i32 13, label %bb13
|
||||
i32 14, label %bb14
|
||||
i32 15, label %bb15
|
||||
i32 16, label %bb16
|
||||
i32 17, label %bb17
|
||||
]
|
||||
; CHECK-LABEL: function jt1_pgso:
|
||||
; CHECK-NEXT: Jump Tables:
|
||||
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK0-NOT: %jump-table.1:
|
||||
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK4-NOT: %jump-table.1:
|
||||
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK8-NOT: %jump-table.1:
|
||||
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECK16-NOT: %jump-table.1:
|
||||
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECKM1-NOT: %jump-table.1:
|
||||
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
|
||||
; CHECKM3-NOT: %jump-table.1:
|
||||
; CHECK-DAG: End machine code for function jt1_pgso.
|
||||
|
||||
bb1: tail call void @ext(i32 1, i32 0) br label %return
|
||||
bb2: tail call void @ext(i32 2, i32 2) br label %return
|
||||
bb3: tail call void @ext(i32 3, i32 4) br label %return
|
||||
bb4: tail call void @ext(i32 4, i32 6) br label %return
|
||||
bb5: tail call void @ext(i32 5, i32 8) br label %return
|
||||
bb6: tail call void @ext(i32 6, i32 10) br label %return
|
||||
bb7: tail call void @ext(i32 7, i32 12) br label %return
|
||||
bb8: tail call void @ext(i32 8, i32 14) br label %return
|
||||
bb9: tail call void @ext(i32 9, i32 16) br label %return
|
||||
bb10: tail call void @ext(i32 1, i32 18) br label %return
|
||||
bb11: tail call void @ext(i32 2, i32 20) br label %return
|
||||
bb12: tail call void @ext(i32 3, i32 22) br label %return
|
||||
bb13: tail call void @ext(i32 4, i32 24) br label %return
|
||||
bb14: tail call void @ext(i32 5, i32 26) br label %return
|
||||
bb15: tail call void @ext(i32 6, i32 28) br label %return
|
||||
bb16: tail call void @ext(i32 7, i32 30) br label %return
|
||||
bb17: tail call void @ext(i32 8, i32 32) br label %return
|
||||
|
||||
return: ret i32 %b
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -17,3 +17,28 @@ define void @f_optsize(<4 x i32>* %p) optsize {
|
||||
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f_pgso:
|
||||
; CHECK: vld1.64 {{.*}}, [r1]
|
||||
; CHECK: .p2align 3
|
||||
define void @f_pgso(<4 x i32>* %p) !prof !14 {
|
||||
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -23,6 +23,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Perform tail call optimization for external symbol.
|
||||
@dest_pgso = global [2 x i8] zeroinitializer
|
||||
define void @caller_extern_pgso(i8* %src) !prof !14 {
|
||||
entry:
|
||||
; CHECK: caller_extern_pgso
|
||||
; CHECK-NOT: call memcpy
|
||||
; CHECK: tail memcpy
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest_pgso, i32 0, i32 0), i8* %src, i32 7, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Perform indirect tail call optimization (for function pointer call).
|
||||
declare void @callee_indirect1()
|
||||
declare void @callee_indirect2()
|
||||
@ -146,3 +157,20 @@ entry:
|
||||
tail call void @callee_nostruct()
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -29,6 +29,13 @@ define i32 @test_minsize(i32 %a) nounwind minsize {
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @test_pgso(i32 %a) nounwind !prof !14 {
|
||||
; CHECK: test_pgso
|
||||
; CHECK: movl
|
||||
; CHECK-NEXT: ret
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @test_add(i32 %a, i32 %b) nounwind {
|
||||
; CHECK: test_add
|
||||
; CHECK: addl
|
||||
@ -101,3 +108,19 @@ while.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -190,6 +190,16 @@ define float @floor_f32_load(float* %aptr) optsize {
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @floor_f32_load_pgso(float* %aptr) !prof !14 {
|
||||
; CHECK-LABEL: floor_f32_load_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = load float, float* %aptr
|
||||
%res = call float @llvm.floor.f32(float %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @nearbyint_f64_load(double* %aptr) optsize {
|
||||
; CHECK-LABEL: nearbyint_f64_load:
|
||||
; CHECK: # %bb.0:
|
||||
@ -200,3 +210,29 @@ define double @nearbyint_f64_load(double* %aptr) optsize {
|
||||
ret double %res
|
||||
}
|
||||
|
||||
define double @nearbyint_f64_load_pgso(double* %aptr) !prof !14 {
|
||||
; CHECK-LABEL: nearbyint_f64_load_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = load double, double* %aptr
|
||||
%res = call double @llvm.nearbyint.f64(double %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -1970,6 +1970,47 @@ define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
|
||||
ret <32 x i16> %ret
|
||||
}
|
||||
|
||||
define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
|
||||
; KNL-LABEL: test_build_vec_v32i1_pgso:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
||||
; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_build_vec_v32i1_pgso:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_build_vec_v32i1_pgso:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_build_vec_v32i1_pgso:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_build_vec_v32i1_pgso:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
%ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %ret
|
||||
}
|
||||
|
||||
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
|
||||
; KNL-LABEL: test_build_vec_v64i1:
|
||||
; KNL: ## %bb.0:
|
||||
@ -2013,12 +2054,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
||||
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB43_2
|
||||
; KNL-NEXT: je LBB44_2
|
||||
; KNL-NEXT: ## %bb.1: ## %L1
|
||||
; KNL-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB43_2: ## %L2
|
||||
; KNL-NEXT: LBB44_2: ## %L2
|
||||
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
@ -2029,12 +2070,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
||||
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
||||
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: ktestb %k0, %k1
|
||||
; SKX-NEXT: je LBB43_2
|
||||
; SKX-NEXT: je LBB44_2
|
||||
; SKX-NEXT: ## %bb.1: ## %L1
|
||||
; SKX-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB43_2: ## %L2
|
||||
; SKX-NEXT: LBB44_2: ## %L2
|
||||
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -2046,12 +2087,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
||||
; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB43_2
|
||||
; AVX512BW-NEXT: je LBB44_2
|
||||
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB43_2: ## %L2
|
||||
; AVX512BW-NEXT: LBB44_2: ## %L2
|
||||
; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2062,12 +2103,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
||||
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
||||
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: ktestb %k0, %k1
|
||||
; AVX512DQ-NEXT: je LBB43_2
|
||||
; AVX512DQ-NEXT: je LBB44_2
|
||||
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB43_2: ## %L2
|
||||
; AVX512DQ-NEXT: LBB44_2: ## %L2
|
||||
; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -2079,12 +2120,12 @@ define void @ktest_1(<8 x double> %in, double * %base) {
|
||||
; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
|
||||
; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0
|
||||
; X86-NEXT: ktestb %k0, %k1
|
||||
; X86-NEXT: je LBB43_2
|
||||
; X86-NEXT: je LBB44_2
|
||||
; X86-NEXT: ## %bb.1: ## %L1
|
||||
; X86-NEXT: vmovapd %zmm0, (%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB43_2: ## %L2
|
||||
; X86-NEXT: LBB44_2: ## %L2
|
||||
; X86-NEXT: vmovapd %zmm0, 8(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
@ -2131,13 +2172,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: je LBB44_2
|
||||
; KNL-NEXT: je LBB45_2
|
||||
; KNL-NEXT: ## %bb.1: ## %L1
|
||||
; KNL-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB44_2: ## %L2
|
||||
; KNL-NEXT: LBB45_2: ## %L2
|
||||
; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -2154,13 +2195,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; SKX-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; SKX-NEXT: kortestd %k1, %k0
|
||||
; SKX-NEXT: je LBB44_2
|
||||
; SKX-NEXT: je LBB45_2
|
||||
; SKX-NEXT: ## %bb.1: ## %L1
|
||||
; SKX-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB44_2: ## %L2
|
||||
; SKX-NEXT: LBB45_2: ## %L2
|
||||
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
@ -2177,13 +2218,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; AVX512BW-NEXT: kortestd %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB44_2
|
||||
; AVX512BW-NEXT: je LBB45_2
|
||||
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB44_2: ## %L2
|
||||
; AVX512BW-NEXT: LBB45_2: ## %L2
|
||||
; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
@ -2203,13 +2244,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: je LBB44_2
|
||||
; AVX512DQ-NEXT: je LBB45_2
|
||||
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
||||
; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB44_2: ## %L2
|
||||
; AVX512DQ-NEXT: LBB45_2: ## %L2
|
||||
; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
|
||||
; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -2227,13 +2268,13 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||
; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
||||
; X86-NEXT: kunpckwd %k1, %k2, %k1
|
||||
; X86-NEXT: kortestd %k1, %k0
|
||||
; X86-NEXT: je LBB44_2
|
||||
; X86-NEXT: je LBB45_2
|
||||
; X86-NEXT: ## %bb.1: ## %L1
|
||||
; X86-NEXT: vmovaps %zmm0, (%eax)
|
||||
; X86-NEXT: vmovaps %zmm1, 64(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB44_2: ## %L2
|
||||
; X86-NEXT: LBB45_2: ## %L2
|
||||
; X86-NEXT: vmovaps %zmm0, 4(%eax)
|
||||
; X86-NEXT: vmovaps %zmm1, 68(%eax)
|
||||
; X86-NEXT: vzeroupper
|
||||
@ -4188,12 +4229,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testw %ax, %ax
|
||||
; KNL-NEXT: jle LBB65_1
|
||||
; KNL-NEXT: jle LBB66_1
|
||||
; KNL-NEXT: ## %bb.2: ## %bb.2
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB65_1: ## %bb.1
|
||||
; KNL-NEXT: LBB66_1: ## %bb.1
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -4207,12 +4248,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testw %ax, %ax
|
||||
; SKX-NEXT: jle LBB65_1
|
||||
; SKX-NEXT: jle LBB66_1
|
||||
; SKX-NEXT: ## %bb.2: ## %bb.2
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB65_1: ## %bb.1
|
||||
; SKX-NEXT: LBB66_1: ## %bb.1
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -4226,12 +4267,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testw %ax, %ax
|
||||
; AVX512BW-NEXT: jle LBB65_1
|
||||
; AVX512BW-NEXT: jle LBB66_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB65_1: ## %bb.1
|
||||
; AVX512BW-NEXT: LBB66_1: ## %bb.1
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -4245,12 +4286,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: testw %ax, %ax
|
||||
; AVX512DQ-NEXT: jle LBB65_1
|
||||
; AVX512DQ-NEXT: jle LBB66_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB65_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: LBB66_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -4264,12 +4305,12 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; X86-NEXT: kmovd %k0, %eax
|
||||
; X86-NEXT: testw %ax, %ax
|
||||
; X86-NEXT: jle LBB65_1
|
||||
; X86-NEXT: jle LBB66_1
|
||||
; X86-NEXT: ## %bb.2: ## %bb.2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB65_1: ## %bb.1
|
||||
; X86-NEXT: LBB66_1: ## %bb.1
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -4297,11 +4338,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
|
||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; CHECK-NEXT: kortestw %k0, %k0
|
||||
; CHECK-NEXT: jb LBB66_2
|
||||
; CHECK-NEXT: jb LBB67_2
|
||||
; CHECK-NEXT: ## %bb.1: ## %bb.1
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: callq _foo
|
||||
; CHECK-NEXT: LBB66_2: ## %bb.2
|
||||
; CHECK-NEXT: LBB67_2: ## %bb.2
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
@ -4313,11 +4354,11 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
|
||||
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; X86-NEXT: kortestw %k0, %k0
|
||||
; X86-NEXT: jb LBB66_2
|
||||
; X86-NEXT: jb LBB67_2
|
||||
; X86-NEXT: ## %bb.1: ## %bb.1
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: LBB66_2: ## %bb.2
|
||||
; X86-NEXT: LBB67_2: ## %bb.2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
@ -4505,12 +4546,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB72_1
|
||||
; KNL-NEXT: je LBB73_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB72_1: ## %bar
|
||||
; KNL-NEXT: LBB73_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -4527,12 +4568,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
|
||||
; SKX-NEXT: korb %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestb %k1, %k0
|
||||
; SKX-NEXT: je LBB72_1
|
||||
; SKX-NEXT: je LBB73_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB72_1: ## %bar
|
||||
; SKX-NEXT: LBB73_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -4555,12 +4596,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB72_1
|
||||
; AVX512BW-NEXT: je LBB73_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB72_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB73_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -4581,12 +4622,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; AVX512DQ-NEXT: korb %k1, %k0, %k0
|
||||
; AVX512DQ-NEXT: korb %k3, %k2, %k1
|
||||
; AVX512DQ-NEXT: ktestb %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB72_1
|
||||
; AVX512DQ-NEXT: je LBB73_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB72_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB73_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -4603,12 +4644,12 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
|
||||
; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
|
||||
; X86-NEXT: korb %k2, %k1, %k1
|
||||
; X86-NEXT: ktestb %k1, %k0
|
||||
; X86-NEXT: je LBB72_1
|
||||
; X86-NEXT: je LBB73_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB72_1: ## %bar
|
||||
; X86-NEXT: LBB73_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -4646,12 +4687,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
||||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: je LBB73_1
|
||||
; KNL-NEXT: je LBB74_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB73_1: ## %bar
|
||||
; KNL-NEXT: LBB74_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -4668,12 +4709,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
||||
; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korb %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestb %k1, %k0
|
||||
; SKX-NEXT: je LBB73_1
|
||||
; SKX-NEXT: je LBB74_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB73_1: ## %bar
|
||||
; SKX-NEXT: LBB74_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -4692,12 +4733,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
||||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testb %al, %al
|
||||
; AVX512BW-NEXT: je LBB73_1
|
||||
; AVX512BW-NEXT: je LBB74_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB73_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB74_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -4714,12 +4755,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
||||
; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; AVX512DQ-NEXT: korb %k2, %k1, %k1
|
||||
; AVX512DQ-NEXT: ktestb %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB73_1
|
||||
; AVX512DQ-NEXT: je LBB74_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB73_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB74_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -4736,12 +4777,12 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
|
||||
; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: korb %k2, %k1, %k1
|
||||
; X86-NEXT: ktestb %k1, %k0
|
||||
; X86-NEXT: je LBB73_1
|
||||
; X86-NEXT: je LBB74_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB73_1: ## %bar
|
||||
; X86-NEXT: LBB74_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -4778,12 +4819,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
||||
; KNL-NEXT: korw %k2, %k1, %k1
|
||||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: je LBB74_1
|
||||
; KNL-NEXT: je LBB75_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB74_1: ## %bar
|
||||
; KNL-NEXT: LBB75_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -4800,12 +4841,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
||||
; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korw %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestw %k1, %k0
|
||||
; SKX-NEXT: je LBB74_1
|
||||
; SKX-NEXT: je LBB75_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB74_1: ## %bar
|
||||
; SKX-NEXT: LBB75_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -4823,12 +4864,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
||||
; AVX512BW-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kortestw %k0, %k0
|
||||
; AVX512BW-NEXT: je LBB74_1
|
||||
; AVX512BW-NEXT: je LBB75_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB74_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB75_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -4845,12 +4886,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
||||
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; AVX512DQ-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512DQ-NEXT: ktestw %k1, %k0
|
||||
; AVX512DQ-NEXT: je LBB74_1
|
||||
; AVX512DQ-NEXT: je LBB75_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB74_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB75_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -4867,12 +4908,12 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
|
||||
; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: korw %k2, %k1, %k1
|
||||
; X86-NEXT: ktestw %k1, %k0
|
||||
; X86-NEXT: je LBB74_1
|
||||
; X86-NEXT: je LBB75_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB74_1: ## %bar
|
||||
; X86-NEXT: LBB75_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -4928,12 +4969,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: orl %eax, %ecx
|
||||
; KNL-NEXT: je LBB75_1
|
||||
; KNL-NEXT: je LBB76_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB75_1: ## %bar
|
||||
; KNL-NEXT: LBB76_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -4950,12 +4991,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: kord %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestd %k1, %k0
|
||||
; SKX-NEXT: je LBB75_1
|
||||
; SKX-NEXT: je LBB76_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB75_1: ## %bar
|
||||
; SKX-NEXT: LBB76_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -4972,12 +5013,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; AVX512BW-NEXT: kord %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: ktestd %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB75_1
|
||||
; AVX512BW-NEXT: je LBB76_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB75_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB76_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -5014,12 +5055,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
||||
; AVX512DQ-NEXT: shll $16, %ecx
|
||||
; AVX512DQ-NEXT: orl %eax, %ecx
|
||||
; AVX512DQ-NEXT: je LBB75_1
|
||||
; AVX512DQ-NEXT: je LBB76_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB75_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB76_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -5036,12 +5077,12 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
|
||||
; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
|
||||
; X86-NEXT: kord %k2, %k1, %k1
|
||||
; X86-NEXT: ktestd %k1, %k0
|
||||
; X86-NEXT: je LBB75_1
|
||||
; X86-NEXT: je LBB76_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB75_1: ## %bar
|
||||
; X86-NEXT: LBB76_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -5121,12 +5162,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; KNL-NEXT: orl %eax, %edx
|
||||
; KNL-NEXT: shlq $32, %rdx
|
||||
; KNL-NEXT: orq %rcx, %rdx
|
||||
; KNL-NEXT: je LBB76_1
|
||||
; KNL-NEXT: je LBB77_1
|
||||
; KNL-NEXT: ## %bb.2: ## %exit
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB76_1: ## %bar
|
||||
; KNL-NEXT: LBB77_1: ## %bar
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
@ -5143,12 +5184,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
|
||||
; SKX-NEXT: korq %k2, %k1, %k1
|
||||
; SKX-NEXT: ktestq %k1, %k0
|
||||
; SKX-NEXT: je LBB76_1
|
||||
; SKX-NEXT: je LBB77_1
|
||||
; SKX-NEXT: ## %bb.2: ## %exit
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB76_1: ## %bar
|
||||
; SKX-NEXT: LBB77_1: ## %bar
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
@ -5165,12 +5206,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
|
||||
; AVX512BW-NEXT: korq %k2, %k1, %k1
|
||||
; AVX512BW-NEXT: ktestq %k1, %k0
|
||||
; AVX512BW-NEXT: je LBB76_1
|
||||
; AVX512BW-NEXT: je LBB77_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB76_1: ## %bar
|
||||
; AVX512BW-NEXT: LBB77_1: ## %bar
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
@ -5231,12 +5272,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; AVX512DQ-NEXT: orl %eax, %edx
|
||||
; AVX512DQ-NEXT: shlq $32, %rdx
|
||||
; AVX512DQ-NEXT: orq %rcx, %rdx
|
||||
; AVX512DQ-NEXT: je LBB76_1
|
||||
; AVX512DQ-NEXT: je LBB77_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %exit
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB76_1: ## %bar
|
||||
; AVX512DQ-NEXT: LBB77_1: ## %bar
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
@ -5255,12 +5296,12 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
|
||||
; X86-NEXT: kandq %k1, %k0, %k0
|
||||
; X86-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X86-NEXT: kortestd %k1, %k0
|
||||
; X86-NEXT: je LBB76_1
|
||||
; X86-NEXT: je LBB77_1
|
||||
; X86-NEXT: ## %bb.2: ## %exit
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: LBB76_1: ## %bar
|
||||
; X86-NEXT: LBB77_1: ## %bar
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: calll _foo
|
||||
; X86-NEXT: addl $12, %esp
|
||||
@ -5360,3 +5401,20 @@ define <64 x i1> @mask64_insert(i32 %a) {
|
||||
%maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
|
||||
ret <64 x i1> %maskv
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -130,6 +130,24 @@ define i64 @div64_optsize(i64 %a, i64 %b) optsize {
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 {
|
||||
; CHECK-LABEL: div64_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: cqto
|
||||
; CHECK-NEXT: idivq %rsi
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; HUGEWS-LABEL: div64_pgso:
|
||||
; HUGEWS: # %bb.0:
|
||||
; HUGEWS-NEXT: movq %rdi, %rax
|
||||
; HUGEWS-NEXT: cqto
|
||||
; HUGEWS-NEXT: idivq %rsi
|
||||
; HUGEWS-NEXT: retq
|
||||
%div = sdiv i64 %a, %b
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
define i64 @div64_hugews(i64 %a, i64 %b) {
|
||||
; ATOM-LABEL: div64_hugews:
|
||||
; ATOM: # %bb.0:
|
||||
@ -137,12 +155,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
|
||||
; ATOM-NEXT: movq %rdi, %rax
|
||||
; ATOM-NEXT: orq %rsi, %rcx
|
||||
; ATOM-NEXT: shrq $32, %rcx
|
||||
; ATOM-NEXT: je .LBB3_1
|
||||
; ATOM-NEXT: je .LBB4_1
|
||||
; ATOM-NEXT: # %bb.2:
|
||||
; ATOM-NEXT: cqto
|
||||
; ATOM-NEXT: idivq %rsi
|
||||
; ATOM-NEXT: retq
|
||||
; ATOM-NEXT: .LBB3_1:
|
||||
; ATOM-NEXT: .LBB4_1:
|
||||
; ATOM-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; ATOM-NEXT: xorl %edx, %edx
|
||||
; ATOM-NEXT: divl %esi
|
||||
@ -155,12 +173,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
|
||||
; SLM-NEXT: movq %rdi, %rax
|
||||
; SLM-NEXT: orq %rsi, %rcx
|
||||
; SLM-NEXT: shrq $32, %rcx
|
||||
; SLM-NEXT: je .LBB3_1
|
||||
; SLM-NEXT: je .LBB4_1
|
||||
; SLM-NEXT: # %bb.2:
|
||||
; SLM-NEXT: cqto
|
||||
; SLM-NEXT: idivq %rsi
|
||||
; SLM-NEXT: retq
|
||||
; SLM-NEXT: .LBB3_1:
|
||||
; SLM-NEXT: .LBB4_1:
|
||||
; SLM-NEXT: xorl %edx, %edx
|
||||
; SLM-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; SLM-NEXT: divl %esi
|
||||
@ -173,12 +191,12 @@ define i64 @div64_hugews(i64 %a, i64 %b) {
|
||||
; SKL-NEXT: movq %rdi, %rcx
|
||||
; SKL-NEXT: orq %rsi, %rcx
|
||||
; SKL-NEXT: shrq $32, %rcx
|
||||
; SKL-NEXT: je .LBB3_1
|
||||
; SKL-NEXT: je .LBB4_1
|
||||
; SKL-NEXT: # %bb.2:
|
||||
; SKL-NEXT: cqto
|
||||
; SKL-NEXT: idivq %rsi
|
||||
; SKL-NEXT: retq
|
||||
; SKL-NEXT: .LBB3_1:
|
||||
; SKL-NEXT: .LBB4_1:
|
||||
; SKL-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; SKL-NEXT: xorl %edx, %edx
|
||||
; SKL-NEXT: divl %esi
|
||||
@ -213,6 +231,24 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize {
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 {
|
||||
; CHECK-LABEL: div32_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %esi
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; HUGEWS-LABEL: div32_pgso:
|
||||
; HUGEWS: # %bb.0:
|
||||
; HUGEWS-NEXT: movl %edi, %eax
|
||||
; HUGEWS-NEXT: cltd
|
||||
; HUGEWS-NEXT: idivl %esi
|
||||
; HUGEWS-NEXT: retq
|
||||
%div = sdiv i32 %a, %b
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i32 @div32_minsize(i32 %a, i32 %b) minsize {
|
||||
; CHECK-LABEL: div32_minsize:
|
||||
; CHECK: # %bb.0:
|
||||
@ -246,3 +282,4 @@ define i32 @div32_minsize(i32 %a, i32 %b) minsize {
|
||||
!12 = !{i32 10000, i64 1000, i32 1}
|
||||
!13 = !{i32 999000, i64 1000, i32 3}
|
||||
!14 = !{i32 999999, i64 5, i32 3}
|
||||
!15 = !{!"function_entry_count", i64 0}
|
||||
|
@ -88,7 +88,7 @@ define i32 @weighted_select1(i32 %a, i32 %b) {
|
||||
; CHECK-NEXT: cmovnel %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
@ -104,7 +104,7 @@ define i32 @weighted_select2(i32 %a, i32 %b) {
|
||||
; CHECK-NEXT: .LBB6_2: # %select.end
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !1
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
@ -124,7 +124,7 @@ define i32 @weighted_select3(i32 %a, i32 %b) {
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
@ -137,12 +137,51 @@ define i32 @unweighted_select(i32 %a, i32 %b) {
|
||||
; CHECK-NEXT: cmovnel %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !3
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !18
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
!0 = !{!"branch_weights", i32 1, i32 99}
|
||||
!1 = !{!"branch_weights", i32 1, i32 100}
|
||||
!2 = !{!"branch_weights", i32 100, i32 1}
|
||||
!3 = !{!"branch_weights", i32 0, i32 0}
|
||||
define i32 @weighted_select_optsize(i32 %a, i32 %b) optsize {
|
||||
; CHECK-LABEL: weighted_select_optsize:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: cmovnel %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
define i32 @weighted_select_pgso(i32 %a, i32 %b) !prof !14 {
|
||||
; CHECK-LABEL: weighted_select_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: cmovnel %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = icmp ne i32 %a, 0
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
!15 = !{!"branch_weights", i32 1, i32 99}
|
||||
!16 = !{!"branch_weights", i32 1, i32 100}
|
||||
!17 = !{!"branch_weights", i32 100, i32 1}
|
||||
!18 = !{!"branch_weights", i32 0, i32 0}
|
||||
|
242
test/CodeGen/X86/conditional-tailcall-pgso.ll
Normal file
242
test/CodeGen/X86/conditional-tailcall-pgso.ll
Normal file
@ -0,0 +1,242 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
|
||||
; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
|
||||
; RUN: llc < %s -mtriple=x86_64-win32 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=WIN64
|
||||
|
||||
declare void @foo()
|
||||
declare void @bar()
|
||||
|
||||
define void @f(i32 %x, i32 %y) !prof !14 {
|
||||
; CHECK32-LABEL: f:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x08]
|
||||
; CHECK32-NEXT: jne bar # TAILCALL
|
||||
; CHECK32-NEXT: # encoding: [0x75,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: # %bb.1: # %bb1
|
||||
; CHECK32-NEXT: jmp foo # TAILCALL
|
||||
; CHECK32-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
;
|
||||
; CHECK64-LABEL: f:
|
||||
; CHECK64: # %bb.0: # %entry
|
||||
; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
|
||||
; CHECK64-NEXT: jne bar # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0x75,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.1: # %bb1
|
||||
; CHECK64-NEXT: jmp foo # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
;
|
||||
; WIN64-LABEL: f:
|
||||
; WIN64: # %bb.0: # %entry
|
||||
; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
|
||||
; WIN64-NEXT: jne bar # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0x75,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.1: # %bb1
|
||||
; WIN64-NEXT: jmp foo # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
entry:
|
||||
%p = icmp eq i32 %x, %y
|
||||
br i1 %p, label %bb1, label %bb2
|
||||
bb1:
|
||||
tail call void @foo()
|
||||
ret void
|
||||
bb2:
|
||||
tail call void @bar()
|
||||
ret void
|
||||
|
||||
; Check that the asm doesn't just look good, but uses the correct encoding.
|
||||
}
|
||||
|
||||
define void @f_non_leaf(i32 %x, i32 %y) !prof !14 {
|
||||
; CHECK32-LABEL: f_non_leaf:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
; CHECK32-NEXT: pushl %ebx # encoding: [0x53]
|
||||
; CHECK32-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK32-NEXT: .cfi_offset %ebx, -8
|
||||
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
|
||||
; CHECK32-NEXT: #APP
|
||||
; CHECK32-NEXT: #NO_APP
|
||||
; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c]
|
||||
; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: # %bb.1: # %bb1
|
||||
; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
|
||||
; CHECK32-NEXT: .cfi_def_cfa_offset 4
|
||||
; CHECK32-NEXT: jmp foo # TAILCALL
|
||||
; CHECK32-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: .LBB1_2: # %bb2
|
||||
; CHECK32-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
|
||||
; CHECK32-NEXT: .cfi_def_cfa_offset 4
|
||||
; CHECK32-NEXT: jmp bar # TAILCALL
|
||||
; CHECK32-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
;
|
||||
; CHECK64-LABEL: f_non_leaf:
|
||||
; CHECK64: # %bb.0: # %entry
|
||||
; CHECK64-NEXT: pushq %rbx # encoding: [0x53]
|
||||
; CHECK64-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK64-NEXT: .cfi_offset %rbx, -16
|
||||
; CHECK64-NEXT: #APP
|
||||
; CHECK64-NEXT: #NO_APP
|
||||
; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
|
||||
; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.1: # %bb1
|
||||
; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
|
||||
; CHECK64-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK64-NEXT: jmp foo # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB1_2: # %bb2
|
||||
; CHECK64-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
|
||||
; CHECK64-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK64-NEXT: jmp bar # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
;
|
||||
; WIN64-LABEL: f_non_leaf:
|
||||
; WIN64: # %bb.0: # %entry
|
||||
; WIN64-NEXT: pushq %rbx # encoding: [0x53]
|
||||
; WIN64-NEXT: .seh_pushreg %rbx
|
||||
; WIN64-NEXT: .seh_endprologue
|
||||
; WIN64-NEXT: #APP
|
||||
; WIN64-NEXT: #NO_APP
|
||||
; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
|
||||
; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.1: # %bb1
|
||||
; WIN64-NEXT: popq %rbx # encoding: [0x5b]
|
||||
; WIN64-NEXT: jmp foo # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB1_2: # %bb2
|
||||
; WIN64-NEXT: nop # encoding: [0x90]
|
||||
; WIN64-NEXT: popq %rbx # encoding: [0x5b]
|
||||
; WIN64-NEXT: jmp bar # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .seh_handlerdata
|
||||
; WIN64-NEXT: .text
|
||||
; WIN64-NEXT: .seh_endproc
|
||||
entry:
|
||||
; Force %ebx to be spilled on the stack, turning this into
|
||||
; not a "leaf" function for Win64.
|
||||
tail call void asm sideeffect "", "~{ebx}"()
|
||||
|
||||
%p = icmp eq i32 %x, %y
|
||||
br i1 %p, label %bb1, label %bb2
|
||||
bb1:
|
||||
tail call void @foo()
|
||||
ret void
|
||||
bb2:
|
||||
tail call void @bar()
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
declare x86_thiscallcc zeroext i1 @baz(i8*, i32)
|
||||
define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) !prof !14 {
|
||||
; CHECK32-LABEL: BlockPlacementTest:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
|
||||
; CHECK32-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
|
||||
; CHECK32-NEXT: je .LBB2_3 # encoding: [0x74,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: # %bb.1: # %land.rhs
|
||||
; CHECK32-NEXT: movb $1, %al # encoding: [0xb0,0x01]
|
||||
; CHECK32-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
|
||||
; CHECK32-NEXT: je baz # TAILCALL
|
||||
; CHECK32-NEXT: # encoding: [0x74,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: .LBB2_2: # %land.end
|
||||
; CHECK32-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00]
|
||||
; CHECK32-NEXT: .LBB2_3:
|
||||
; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; CHECK32-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
|
||||
;
|
||||
; CHECK64-LABEL: BlockPlacementTest:
|
||||
; CHECK64: # %bb.0: # %entry
|
||||
; CHECK64-NEXT: testb $42, %sil # encoding: [0x40,0xf6,0xc6,0x2a]
|
||||
; CHECK64-NEXT: je .LBB2_3 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.1: # %land.rhs
|
||||
; CHECK64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
|
||||
; CHECK64-NEXT: testb $44, %sil # encoding: [0x40,0xf6,0xc6,0x2c]
|
||||
; CHECK64-NEXT: je baz # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB2_2: # %land.end
|
||||
; CHECK64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK64-NEXT: retq # encoding: [0xc3]
|
||||
; CHECK64-NEXT: .LBB2_3:
|
||||
; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; CHECK64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
|
||||
;
|
||||
; WIN64-LABEL: BlockPlacementTest:
|
||||
; WIN64: # %bb.0: # %entry
|
||||
; WIN64-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
|
||||
; WIN64-NEXT: je .LBB2_3 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.1: # %land.rhs
|
||||
; WIN64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
|
||||
; WIN64-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
|
||||
; WIN64-NEXT: je baz # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB2_2: # %land.end
|
||||
; WIN64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; WIN64-NEXT: retq # encoding: [0xc3]
|
||||
; WIN64-NEXT: .LBB2_3:
|
||||
; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; WIN64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
|
||||
entry:
|
||||
%and = and i32 %x, 42
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %land.end, label %land.rhs
|
||||
|
||||
land.rhs:
|
||||
%and6 = and i32 %x, 44
|
||||
%tobool7 = icmp eq i32 %and6, 0
|
||||
br i1 %tobool7, label %lor.rhs, label %land.end
|
||||
|
||||
lor.rhs:
|
||||
%call = tail call x86_thiscallcc zeroext i1 @baz(i8* %this, i32 %x) #2
|
||||
br label %land.end
|
||||
|
||||
land.end:
|
||||
%0 = phi i1 [ false, %entry ], [ true, %land.rhs ], [ %call, %lor.rhs ]
|
||||
ret i1 %0
|
||||
|
||||
; Make sure machine block placement isn't confused by the conditional tail call,
|
||||
; but sees that it can fall through to the next block.
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
@ -108,11 +108,11 @@ for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo_nosize(i32 inreg %dns) {
|
||||
; SLOW-LABEL: foo_nosize:
|
||||
define void @foo_pgso(i32 inreg %dns) !prof !14 {
|
||||
; SLOW-LABEL: foo_pgso:
|
||||
; SLOW: # %bb.0: # %entry
|
||||
; SLOW-NEXT: movw $-1, %cx
|
||||
; SLOW-NEXT: .p2align 4, 0x90
|
||||
; SLOW-NEXT: xorl %ecx, %ecx
|
||||
; SLOW-NEXT: decl %ecx
|
||||
; SLOW-NEXT: .LBB4_1: # %for.body
|
||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SLOW-NEXT: movzwl %cx, %edx
|
||||
@ -122,10 +122,10 @@ define void @foo_nosize(i32 inreg %dns) {
|
||||
; SLOW-NEXT: # %bb.2: # %for.end
|
||||
; SLOW-NEXT: retl
|
||||
;
|
||||
; FAST-LABEL: foo_nosize:
|
||||
; FAST-LABEL: foo_pgso:
|
||||
; FAST: # %bb.0: # %entry
|
||||
; FAST-NEXT: movw $-1, %cx
|
||||
; FAST-NEXT: .p2align 4, 0x90
|
||||
; FAST-NEXT: xorl %ecx, %ecx
|
||||
; FAST-NEXT: decl %ecx
|
||||
; FAST-NEXT: .LBB4_1: # %for.body
|
||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; FAST-NEXT: movzwl %cx, %edx
|
||||
@ -148,11 +148,11 @@ for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bar_nosize(i32 inreg %dns) {
|
||||
; SLOW-LABEL: bar_nosize:
|
||||
define void @bar_pgso(i32 inreg %dns) !prof !14 {
|
||||
; SLOW-LABEL: bar_pgso:
|
||||
; SLOW: # %bb.0: # %entry
|
||||
; SLOW-NEXT: movw $1, %cx
|
||||
; SLOW-NEXT: .p2align 4, 0x90
|
||||
; SLOW-NEXT: xorl %ecx, %ecx
|
||||
; SLOW-NEXT: incl %ecx
|
||||
; SLOW-NEXT: .LBB5_1: # %for.body
|
||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SLOW-NEXT: movzwl %cx, %edx
|
||||
@ -162,10 +162,10 @@ define void @bar_nosize(i32 inreg %dns) {
|
||||
; SLOW-NEXT: # %bb.2: # %for.end
|
||||
; SLOW-NEXT: retl
|
||||
;
|
||||
; FAST-LABEL: bar_nosize:
|
||||
; FAST-LABEL: bar_pgso:
|
||||
; FAST: # %bb.0: # %entry
|
||||
; FAST-NEXT: movw $1, %cx
|
||||
; FAST-NEXT: .p2align 4, 0x90
|
||||
; FAST-NEXT: xorl %ecx, %ecx
|
||||
; FAST-NEXT: incl %ecx
|
||||
; FAST-NEXT: .LBB5_1: # %for.body
|
||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; FAST-NEXT: movzwl %cx, %edx
|
||||
@ -186,3 +186,99 @@ for.body:
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo_nosize(i32 inreg %dns) {
|
||||
; SLOW-LABEL: foo_nosize:
|
||||
; SLOW: # %bb.0: # %entry
|
||||
; SLOW-NEXT: movw $-1, %cx
|
||||
; SLOW-NEXT: .p2align 4, 0x90
|
||||
; SLOW-NEXT: .LBB6_1: # %for.body
|
||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SLOW-NEXT: movzwl %cx, %edx
|
||||
; SLOW-NEXT: decl %ecx
|
||||
; SLOW-NEXT: cmpl %eax, %edx
|
||||
; SLOW-NEXT: jl .LBB6_1
|
||||
; SLOW-NEXT: # %bb.2: # %for.end
|
||||
; SLOW-NEXT: retl
|
||||
;
|
||||
; FAST-LABEL: foo_nosize:
|
||||
; FAST: # %bb.0: # %entry
|
||||
; FAST-NEXT: movw $-1, %cx
|
||||
; FAST-NEXT: .p2align 4, 0x90
|
||||
; FAST-NEXT: .LBB6_1: # %for.body
|
||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; FAST-NEXT: movzwl %cx, %edx
|
||||
; FAST-NEXT: addl $-1, %ecx
|
||||
; FAST-NEXT: cmpl %eax, %edx
|
||||
; FAST-NEXT: jl .LBB6_1
|
||||
; FAST-NEXT: # %bb.2: # %for.end
|
||||
; FAST-NEXT: retl
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i.05 = phi i16 [ %dec, %for.body ], [ 0, %entry ]
|
||||
%dec = add i16 %i.05, -1
|
||||
%conv = zext i16 %dec to i32
|
||||
%cmp = icmp slt i32 %conv, %dns
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bar_nosize(i32 inreg %dns) {
|
||||
; SLOW-LABEL: bar_nosize:
|
||||
; SLOW: # %bb.0: # %entry
|
||||
; SLOW-NEXT: movw $1, %cx
|
||||
; SLOW-NEXT: .p2align 4, 0x90
|
||||
; SLOW-NEXT: .LBB7_1: # %for.body
|
||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SLOW-NEXT: movzwl %cx, %edx
|
||||
; SLOW-NEXT: incl %ecx
|
||||
; SLOW-NEXT: cmpl %eax, %edx
|
||||
; SLOW-NEXT: jl .LBB7_1
|
||||
; SLOW-NEXT: # %bb.2: # %for.end
|
||||
; SLOW-NEXT: retl
|
||||
;
|
||||
; FAST-LABEL: bar_nosize:
|
||||
; FAST: # %bb.0: # %entry
|
||||
; FAST-NEXT: movw $1, %cx
|
||||
; FAST-NEXT: .p2align 4, 0x90
|
||||
; FAST-NEXT: .LBB7_1: # %for.body
|
||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; FAST-NEXT: movzwl %cx, %edx
|
||||
; FAST-NEXT: addl $1, %ecx
|
||||
; FAST-NEXT: cmpl %eax, %edx
|
||||
; FAST-NEXT: jl .LBB7_1
|
||||
; FAST-NEXT: # %bb.2: # %for.end
|
||||
; FAST-NEXT: retl
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i.05 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
|
||||
%inc = add i16 %i.05, 1
|
||||
%conv = zext i16 %inc to i32
|
||||
%cmp = icmp slt i32 %conv, %dns
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -113,6 +113,38 @@ define <4 x float> @rcpss_full_size(<4 x float>* %a) optsize {
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @rcpss_pgso(float* %a) !prof !14 {
|
||||
; SSE-LABEL: rcpss_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rcpss (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: rcpss_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load float, float* %a
|
||||
%ins = insertelement <4 x float> undef, float %ld, i32 0
|
||||
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
|
||||
%ext = extractelement <4 x float> %res, i32 0
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
define <4 x float> @rcpss_full_pgso(<4 x float>* %a) !prof !14 {
|
||||
; SSE-LABEL: rcpss_full_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rcpss (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: rcpss_full_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load <4 x float>, <4 x float>* %a
|
||||
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @rsqrtss_size(float* %a) optsize {
|
||||
; SSE-LABEL: rsqrtss_size:
|
||||
; SSE: # %bb.0:
|
||||
@ -145,6 +177,38 @@ define <4 x float> @rsqrtss_full_size(<4 x float>* %a) optsize {
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @rsqrtss_pgso(float* %a) !prof !14 {
|
||||
; SSE-LABEL: rsqrtss_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rsqrtss (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: rsqrtss_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load float, float* %a
|
||||
%ins = insertelement <4 x float> undef, float %ld, i32 0
|
||||
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
|
||||
%ext = extractelement <4 x float> %res, i32 0
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
define <4 x float> @rsqrtss_full_pgso(<4 x float>* %a) !prof !14 {
|
||||
; SSE-LABEL: rsqrtss_full_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rsqrtss (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: rsqrtss_full_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load <4 x float>, <4 x float>* %a
|
||||
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @sqrtss_size(float* %a) optsize{
|
||||
; SSE-LABEL: sqrtss_size:
|
||||
; SSE: # %bb.0:
|
||||
@ -196,6 +260,57 @@ define <4 x float> @sqrtss_full_size_volatile(<4 x float>* %a) optsize{
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @sqrtss_pgso(float* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtss_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: sqrtss (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtss_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load float, float* %a
|
||||
%ins = insertelement <4 x float> undef, float %ld, i32 0
|
||||
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
|
||||
%ext = extractelement <4 x float> %res, i32 0
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
define <4 x float> @sqrtss_full_pgso(<4 x float>* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtss_full_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE-NEXT: sqrtss %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtss_full_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load <4 x float>, <4 x float>* %a
|
||||
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @sqrtss_full_pgso_volatile(<4 x float>* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtss_full_pgso_volatile:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE-NEXT: sqrtss %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtss_full_pgso_volatile:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load volatile <4 x float>, <4 x float>* %a
|
||||
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define double @sqrtsd_size(double* %a) optsize {
|
||||
; SSE-LABEL: sqrtsd_size:
|
||||
; SSE: # %bb.0:
|
||||
@ -247,7 +362,75 @@ define <2 x double> @sqrtsd_full_size_volatile(<2 x double>* %a) optsize {
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @sqrtsd_pgso(double* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtsd_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: sqrtsd (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtsd_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load double, double* %a
|
||||
%ins = insertelement <2 x double> undef, double %ld, i32 0
|
||||
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
|
||||
%ext = extractelement <2 x double> %res, i32 0
|
||||
ret double %ext
|
||||
}
|
||||
|
||||
define <2 x double> @sqrtsd_full_pgso(<2 x double>* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtsd_full_pgso:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd (%rdi), %xmm0
|
||||
; SSE-NEXT: sqrtsd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtsd_full_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd (%rdi), %xmm0
|
||||
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load <2 x double>, <2 x double>* %a
|
||||
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @sqrtsd_full_pgso_volatile(<2 x double>* %a) !prof !14 {
|
||||
; SSE-LABEL: sqrtsd_full_pgso_volatile:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd (%rdi), %xmm0
|
||||
; SSE-NEXT: sqrtsd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrtsd_full_pgso_volatile:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovapd (%rdi), %xmm0
|
||||
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load volatile <2 x double>, <2 x double>* %a
|
||||
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -196,6 +196,26 @@ define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
|
||||
; X86-LABEL: var_shift_i32_pgso:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: shldl %cl, %edx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: var_shift_i32_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edx, %ecx
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shldl %cl, %esi, %eax
|
||||
; X64-NEXT: retq
|
||||
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-FAST-LABEL: var_shift_i64:
|
||||
; X86-FAST: # %bb.0:
|
||||
@ -216,36 +236,36 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-FAST-NEXT: shll %cl, %edi
|
||||
; X86-FAST-NEXT: shldl %cl, %eax, %ebp
|
||||
; X86-FAST-NEXT: testb $32, %bl
|
||||
; X86-FAST-NEXT: je .LBB4_2
|
||||
; X86-FAST-NEXT: je .LBB5_2
|
||||
; X86-FAST-NEXT: # %bb.1:
|
||||
; X86-FAST-NEXT: movl %edi, %ebp
|
||||
; X86-FAST-NEXT: xorl %edi, %edi
|
||||
; X86-FAST-NEXT: .LBB4_2:
|
||||
; X86-FAST-NEXT: .LBB5_2:
|
||||
; X86-FAST-NEXT: movb $64, %cl
|
||||
; X86-FAST-NEXT: subb %bl, %cl
|
||||
; X86-FAST-NEXT: movl %edx, %esi
|
||||
; X86-FAST-NEXT: shrl %cl, %esi
|
||||
; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
|
||||
; X86-FAST-NEXT: testb $32, %cl
|
||||
; X86-FAST-NEXT: jne .LBB4_3
|
||||
; X86-FAST-NEXT: jne .LBB5_3
|
||||
; X86-FAST-NEXT: # %bb.4:
|
||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-FAST-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
||||
; X86-FAST-NEXT: jne .LBB4_6
|
||||
; X86-FAST-NEXT: jmp .LBB4_7
|
||||
; X86-FAST-NEXT: .LBB4_3:
|
||||
; X86-FAST-NEXT: jne .LBB5_6
|
||||
; X86-FAST-NEXT: jmp .LBB5_7
|
||||
; X86-FAST-NEXT: .LBB5_3:
|
||||
; X86-FAST-NEXT: movl %esi, %ecx
|
||||
; X86-FAST-NEXT: xorl %esi, %esi
|
||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
||||
; X86-FAST-NEXT: je .LBB4_7
|
||||
; X86-FAST-NEXT: .LBB4_6:
|
||||
; X86-FAST-NEXT: je .LBB5_7
|
||||
; X86-FAST-NEXT: .LBB5_6:
|
||||
; X86-FAST-NEXT: orl %esi, %ebp
|
||||
; X86-FAST-NEXT: orl %ecx, %edi
|
||||
; X86-FAST-NEXT: movl %edi, %eax
|
||||
; X86-FAST-NEXT: movl %ebp, %edx
|
||||
; X86-FAST-NEXT: .LBB4_7:
|
||||
; X86-FAST-NEXT: .LBB5_7:
|
||||
; X86-FAST-NEXT: addl $4, %esp
|
||||
; X86-FAST-NEXT: popl %esi
|
||||
; X86-FAST-NEXT: popl %edi
|
||||
@ -279,11 +299,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-SLOW-NEXT: testb %dl, %dl
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: je .LBB4_2
|
||||
; X86-SLOW-NEXT: je .LBB5_2
|
||||
; X86-SLOW-NEXT: # %bb.1:
|
||||
; X86-SLOW-NEXT: orl %eax, %ebp
|
||||
; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: .LBB4_2:
|
||||
; X86-SLOW-NEXT: .LBB5_2:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-SLOW-NEXT: movl %ebp, %eax
|
||||
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||
@ -294,41 +314,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-SLOW-NEXT: negb %cl
|
||||
; X86-SLOW-NEXT: shrl %cl, %edi
|
||||
; X86-SLOW-NEXT: testb %ch, %ch
|
||||
; X86-SLOW-NEXT: je .LBB4_4
|
||||
; X86-SLOW-NEXT: je .LBB5_4
|
||||
; X86-SLOW-NEXT: # %bb.3:
|
||||
; X86-SLOW-NEXT: orl %edi, %eax
|
||||
; X86-SLOW-NEXT: movl %eax, %ebp
|
||||
; X86-SLOW-NEXT: .LBB4_4:
|
||||
; X86-SLOW-NEXT: .LBB5_4:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLOW-NEXT: movl %eax, %edi
|
||||
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||
; X86-SLOW-NEXT: shll %cl, %edi
|
||||
; X86-SLOW-NEXT: testb $32, %bl
|
||||
; X86-SLOW-NEXT: je .LBB4_6
|
||||
; X86-SLOW-NEXT: je .LBB5_6
|
||||
; X86-SLOW-NEXT: # %bb.5:
|
||||
; X86-SLOW-NEXT: movl %edi, %ebp
|
||||
; X86-SLOW-NEXT: xorl %edi, %edi
|
||||
; X86-SLOW-NEXT: .LBB4_6:
|
||||
; X86-SLOW-NEXT: .LBB5_6:
|
||||
; X86-SLOW-NEXT: movb %dh, %cl
|
||||
; X86-SLOW-NEXT: shrl %cl, %esi
|
||||
; X86-SLOW-NEXT: testb $32, %dh
|
||||
; X86-SLOW-NEXT: jne .LBB4_7
|
||||
; X86-SLOW-NEXT: jne .LBB5_7
|
||||
; X86-SLOW-NEXT: # %bb.8:
|
||||
; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
||||
; X86-SLOW-NEXT: jne .LBB4_10
|
||||
; X86-SLOW-NEXT: jmp .LBB4_11
|
||||
; X86-SLOW-NEXT: .LBB4_7:
|
||||
; X86-SLOW-NEXT: jne .LBB5_10
|
||||
; X86-SLOW-NEXT: jmp .LBB5_11
|
||||
; X86-SLOW-NEXT: .LBB5_7:
|
||||
; X86-SLOW-NEXT: movl %esi, %ecx
|
||||
; X86-SLOW-NEXT: xorl %esi, %esi
|
||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
||||
; X86-SLOW-NEXT: je .LBB4_11
|
||||
; X86-SLOW-NEXT: .LBB4_10:
|
||||
; X86-SLOW-NEXT: je .LBB5_11
|
||||
; X86-SLOW-NEXT: .LBB5_10:
|
||||
; X86-SLOW-NEXT: orl %esi, %ebp
|
||||
; X86-SLOW-NEXT: orl %ecx, %edi
|
||||
; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: movl %edi, %eax
|
||||
; X86-SLOW-NEXT: .LBB4_11:
|
||||
; X86-SLOW-NEXT: .LBB5_11:
|
||||
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
|
||||
; X86-SLOW-NEXT: addl $8, %esp
|
||||
; X86-SLOW-NEXT: popl %esi
|
||||
@ -503,3 +523,20 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
|
||||
%tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -195,6 +195,26 @@ define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
|
||||
; X86-LABEL: var_shift_i32_pgso:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: shrdl %cl, %edx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: var_shift_i32_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edx, %ecx
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrdl %cl, %edi, %eax
|
||||
; X64-NEXT: retq
|
||||
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-FAST-LABEL: var_shift_i64:
|
||||
; X86-FAST: # %bb.0:
|
||||
@ -216,30 +236,30 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-FAST-NEXT: shll %cl, %edi
|
||||
; X86-FAST-NEXT: shldl %cl, %eax, %esi
|
||||
; X86-FAST-NEXT: testb $32, %cl
|
||||
; X86-FAST-NEXT: je .LBB4_2
|
||||
; X86-FAST-NEXT: je .LBB5_2
|
||||
; X86-FAST-NEXT: # %bb.1:
|
||||
; X86-FAST-NEXT: movl %edi, %esi
|
||||
; X86-FAST-NEXT: xorl %edi, %edi
|
||||
; X86-FAST-NEXT: .LBB4_2:
|
||||
; X86-FAST-NEXT: .LBB5_2:
|
||||
; X86-FAST-NEXT: movl %edx, %ebp
|
||||
; X86-FAST-NEXT: movl %ebx, %ecx
|
||||
; X86-FAST-NEXT: shrl %cl, %ebp
|
||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-FAST-NEXT: shrdl %cl, %edx, %eax
|
||||
; X86-FAST-NEXT: testb $32, %bl
|
||||
; X86-FAST-NEXT: je .LBB4_4
|
||||
; X86-FAST-NEXT: je .LBB5_4
|
||||
; X86-FAST-NEXT: # %bb.3:
|
||||
; X86-FAST-NEXT: movl %ebp, %eax
|
||||
; X86-FAST-NEXT: xorl %ebp, %ebp
|
||||
; X86-FAST-NEXT: .LBB4_4:
|
||||
; X86-FAST-NEXT: .LBB5_4:
|
||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
||||
; X86-FAST-NEXT: je .LBB4_6
|
||||
; X86-FAST-NEXT: je .LBB5_6
|
||||
; X86-FAST-NEXT: # %bb.5:
|
||||
; X86-FAST-NEXT: orl %ebp, %esi
|
||||
; X86-FAST-NEXT: orl %eax, %edi
|
||||
; X86-FAST-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X86-FAST-NEXT: movl %esi, %edx
|
||||
; X86-FAST-NEXT: .LBB4_6:
|
||||
; X86-FAST-NEXT: .LBB5_6:
|
||||
; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload
|
||||
; X86-FAST-NEXT: addl $4, %esp
|
||||
; X86-FAST-NEXT: popl %esi
|
||||
@ -274,11 +294,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-SLOW-NEXT: shrl %cl, %edi
|
||||
; X86-SLOW-NEXT: testb %ch, %ch
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-SLOW-NEXT: je .LBB4_2
|
||||
; X86-SLOW-NEXT: je .LBB5_2
|
||||
; X86-SLOW-NEXT: # %bb.1:
|
||||
; X86-SLOW-NEXT: orl %edi, %edx
|
||||
; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: .LBB4_2:
|
||||
; X86-SLOW-NEXT: .LBB5_2:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||
; X86-SLOW-NEXT: shrl %cl, %edx
|
||||
@ -290,41 +310,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-SLOW-NEXT: shll %cl, %edi
|
||||
; X86-SLOW-NEXT: testb %ah, %ah
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-SLOW-NEXT: je .LBB4_4
|
||||
; X86-SLOW-NEXT: je .LBB5_4
|
||||
; X86-SLOW-NEXT: # %bb.3:
|
||||
; X86-SLOW-NEXT: orl %edx, %edi
|
||||
; X86-SLOW-NEXT: movl %edi, %ebp
|
||||
; X86-SLOW-NEXT: .LBB4_4:
|
||||
; X86-SLOW-NEXT: .LBB5_4:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||
; X86-SLOW-NEXT: shrl %cl, %edi
|
||||
; X86-SLOW-NEXT: testb $32, %bl
|
||||
; X86-SLOW-NEXT: je .LBB4_6
|
||||
; X86-SLOW-NEXT: je .LBB5_6
|
||||
; X86-SLOW-NEXT: # %bb.5:
|
||||
; X86-SLOW-NEXT: movl %edi, %ebp
|
||||
; X86-SLOW-NEXT: xorl %edi, %edi
|
||||
; X86-SLOW-NEXT: .LBB4_6:
|
||||
; X86-SLOW-NEXT: .LBB5_6:
|
||||
; X86-SLOW-NEXT: movl %eax, %ecx
|
||||
; X86-SLOW-NEXT: shll %cl, %esi
|
||||
; X86-SLOW-NEXT: testb $32, %al
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SLOW-NEXT: jne .LBB4_7
|
||||
; X86-SLOW-NEXT: jne .LBB5_7
|
||||
; X86-SLOW-NEXT: # %bb.8:
|
||||
; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
|
||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
||||
; X86-SLOW-NEXT: jne .LBB4_10
|
||||
; X86-SLOW-NEXT: jmp .LBB4_11
|
||||
; X86-SLOW-NEXT: .LBB4_7:
|
||||
; X86-SLOW-NEXT: jne .LBB5_10
|
||||
; X86-SLOW-NEXT: jmp .LBB5_11
|
||||
; X86-SLOW-NEXT: .LBB5_7:
|
||||
; X86-SLOW-NEXT: movl %esi, %eax
|
||||
; X86-SLOW-NEXT: xorl %esi, %esi
|
||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
||||
; X86-SLOW-NEXT: je .LBB4_11
|
||||
; X86-SLOW-NEXT: .LBB4_10:
|
||||
; X86-SLOW-NEXT: je .LBB5_11
|
||||
; X86-SLOW-NEXT: .LBB5_10:
|
||||
; X86-SLOW-NEXT: orl %ebp, %esi
|
||||
; X86-SLOW-NEXT: orl %edi, %eax
|
||||
; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: movl %eax, %edx
|
||||
; X86-SLOW-NEXT: .LBB4_11:
|
||||
; X86-SLOW-NEXT: .LBB5_11:
|
||||
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-SLOW-NEXT: addl $8, %esp
|
||||
; X86-SLOW-NEXT: popl %esi
|
||||
@ -498,3 +518,20 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
|
||||
%tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 7)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -1983,6 +1983,80 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize {
|
||||
ret float %x230
|
||||
}
|
||||
|
||||
define float @hadd32_4_pgso(<4 x float> %x225) !prof !14 {
|
||||
; SSE3-LABEL: hadd32_4_pgso:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||
; SSE3-NEXT: addps %xmm0, %xmm1
|
||||
; SSE3-NEXT: haddps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: hadd32_4_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%x227 = fadd <4 x float> %x225, %x226
|
||||
%x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%x229 = fadd <4 x float> %x227, %x228
|
||||
%x230 = extractelement <4 x float> %x229, i32 0
|
||||
ret float %x230
|
||||
}
|
||||
|
||||
define float @hadd32_8_pgso(<8 x float> %x225) !prof !14 {
|
||||
; SSE3-LABEL: hadd32_8_pgso:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||
; SSE3-NEXT: addps %xmm0, %xmm1
|
||||
; SSE3-NEXT: haddps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: hadd32_8_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%x227 = fadd <8 x float> %x225, %x226
|
||||
%x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%x229 = fadd <8 x float> %x227, %x228
|
||||
%x230 = extractelement <8 x float> %x229, i32 0
|
||||
ret float %x230
|
||||
}
|
||||
|
||||
define float @hadd32_16_pgso(<16 x float> %x225) !prof !14 {
|
||||
; SSE3-LABEL: hadd32_16_pgso:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||
; SSE3-NEXT: addps %xmm0, %xmm1
|
||||
; SSE3-NEXT: haddps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: hadd32_16_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%x227 = fadd <16 x float> %x225, %x226
|
||||
%x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%x229 = fadd <16 x float> %x227, %x228
|
||||
%x230 = extractelement <16 x float> %x229, i32 0
|
||||
ret float %x230
|
||||
}
|
||||
|
||||
define float @partial_reduction_fadd_v8f32(<8 x float> %x) {
|
||||
; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
@ -2115,3 +2189,20 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) {
|
||||
%r = extractelement <16 x float> %x0123, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -73,6 +73,68 @@ if.end: ; preds = %if.then, %entry
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Test PGSO to make sure immediates with multiple users don't get pulled in to
|
||||
; instructions.
|
||||
define i32 @foo_pgso() !prof !14 {
|
||||
; X86-LABEL: foo_pgso:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl $1234, %eax # imm = 0x4D2
|
||||
; X86-NEXT: movl %eax, a
|
||||
; X86-NEXT: movl %eax, b
|
||||
; X86-NEXT: movl $12, %eax
|
||||
; X86-NEXT: movl %eax, c
|
||||
; X86-NEXT: cmpl %eax, e
|
||||
; X86-NEXT: jne .LBB1_2
|
||||
; X86-NEXT: # %bb.1: # %if.then
|
||||
; X86-NEXT: movl $1, x
|
||||
; X86-NEXT: .LBB1_2: # %if.end
|
||||
; X86-NEXT: movl $1234, f # imm = 0x4D2
|
||||
; X86-NEXT: movl $555, %eax # imm = 0x22B
|
||||
; X86-NEXT: movl %eax, h
|
||||
; X86-NEXT: addl %eax, i
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: foo_pgso:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movl $1234, %eax # imm = 0x4D2
|
||||
; X64-NEXT: movl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: movl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: movl $12, %eax
|
||||
; X64-NEXT: movl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: cmpl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: jne .LBB1_2
|
||||
; X64-NEXT: # %bb.1: # %if.then
|
||||
; X64-NEXT: movl $1, {{.*}}(%rip)
|
||||
; X64-NEXT: .LBB1_2: # %if.end
|
||||
; X64-NEXT: movl $1234, {{.*}}(%rip) # imm = 0x4D2
|
||||
; X64-NEXT: movl $555, %eax # imm = 0x22B
|
||||
; X64-NEXT: movl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: addl %eax, {{.*}}(%rip)
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
store i32 1234, i32* @a
|
||||
store i32 1234, i32* @b
|
||||
store i32 12, i32* @c
|
||||
%0 = load i32, i32* @e
|
||||
%cmp = icmp eq i32 %0, 12
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
store i32 1, i32* @x
|
||||
br label %if.end
|
||||
|
||||
; New block.. Make sure 1234 isn't live across basic blocks from before.
|
||||
if.end: ; preds = %if.then, %entry
|
||||
store i32 1234, i32* @f
|
||||
store i32 555, i32* @h
|
||||
%1 = load i32, i32* @i
|
||||
%add1 = add nsw i32 %1, 555
|
||||
store i32 %add1, i32* @i
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Test -O2 to make sure that all immediates get pulled in to their users.
|
||||
define i32 @foo2() {
|
||||
; X86-LABEL: foo2:
|
||||
@ -124,3 +186,47 @@ entry:
|
||||
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; memset gets lowered in DAG. Constant merging should hoist all the
|
||||
; immediates used to store to the individual memory locations. Make
|
||||
; sure we don't directly store the immediates.
|
||||
define void @foomemset_pgso() !prof !14 {
|
||||
; X86-LABEL: foomemset_pgso:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl $555819297, %eax # imm = 0x21212121
|
||||
; X86-NEXT: movl %eax, AA+20
|
||||
; X86-NEXT: movl %eax, AA+16
|
||||
; X86-NEXT: movl %eax, AA+12
|
||||
; X86-NEXT: movl %eax, AA+8
|
||||
; X86-NEXT: movl %eax, AA+4
|
||||
; X86-NEXT: movl %eax, AA
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: foomemset_pgso:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movabsq $2387225703656530209, %rax # imm = 0x2121212121212121
|
||||
; X64-NEXT: movq %rax, AA+{{.*}}(%rip)
|
||||
; X64-NEXT: movq %rax, AA+{{.*}}(%rip)
|
||||
; X64-NEXT: movq %rax, {{.*}}(%rip)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -19,6 +19,19 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 {
|
||||
; CHECK-LABEL: imm_multiple_users_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq $-1, %rax
|
||||
; CHECK-NEXT: movq %rax, (%rsi)
|
||||
; CHECK-NEXT: cmpq %rax, %rdi
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
store i64 -1, i64* %b, align 8
|
||||
%cmp = icmp eq i64 %a, -1
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
|
||||
|
||||
; Inlined memsets requiring multiple same-sized stores should be lowered using
|
||||
@ -34,3 +47,31 @@ define void @memset_zero(i8* noalias nocapture %D) optsize {
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %D, i8 0, i64 15, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_zero_pgso(i8* noalias nocapture %D) !prof !14 {
|
||||
; CHECK-LABEL: memset_zero_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: movq %rax, 7(%rdi)
|
||||
; CHECK-NEXT: movq %rax, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %D, i8 0, i64 15, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -269,6 +269,35 @@ exit:
|
||||
|
||||
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
|
||||
|
||||
; CHECK-LABEL: slightly_more_involved_2_pgso:
|
||||
; CHECK-NOT: jmp .LBB6_1
|
||||
; CHECK: .LBB6_1:
|
||||
; CHECK-NEXT: callq body
|
||||
|
||||
define void @slightly_more_involved_2_pgso() norecurse nounwind readnone uwtable !prof !14 {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @body()
|
||||
%t0 = call i32 @get()
|
||||
%t1 = icmp slt i32 %t0, 2
|
||||
br i1 %t1, label %block_a, label %bb
|
||||
|
||||
bb:
|
||||
%t2 = call i32 @get()
|
||||
%t3 = icmp slt i32 %t2, 99
|
||||
br i1 %t3, label %exit, label %loop
|
||||
|
||||
block_a:
|
||||
call void @bar99()
|
||||
br label %loop
|
||||
|
||||
exit:
|
||||
call void @exit()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar99() nounwind
|
||||
declare void @bar100() nounwind
|
||||
declare void @bar101() nounwind
|
||||
@ -281,3 +310,20 @@ declare i32 @get() nounwind
|
||||
declare void @block_a_true_func() nounwind
|
||||
declare void @block_a_false_func() nounwind
|
||||
declare void @block_a_merge_func() nounwind
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -30,6 +30,21 @@ entry:
|
||||
; CHECK64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @one32_pgso() !prof !14 {
|
||||
entry:
|
||||
ret i32 1
|
||||
|
||||
; CHECK32-LABEL: one32_pgso:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
|
||||
; FIXME: Figure out the best approach in 64-bit mode.
|
||||
; CHECK64-LABEL: one32_pgso:
|
||||
; CHECK64: movl $1, %eax
|
||||
; CHECK64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @one32_minsize() minsize {
|
||||
entry:
|
||||
ret i32 1
|
||||
@ -107,6 +122,16 @@ entry:
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @minus_one32_pgso() !prof !14 {
|
||||
entry:
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one32_pgso:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @minus_one32_minsize() minsize {
|
||||
entry:
|
||||
ret i32 -1
|
||||
@ -140,6 +165,28 @@ entry:
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i16 @one16_pgso() !prof !14 {
|
||||
entry:
|
||||
ret i16 1
|
||||
|
||||
; CHECK32-LABEL: one16_pgso:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: # kill
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i16 @minus_one16_pgso() !prof !14 {
|
||||
entry:
|
||||
ret i16 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one16_pgso:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: # kill
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @minus_five32() minsize {
|
||||
entry:
|
||||
ret i32 -5
|
||||
@ -213,4 +260,72 @@ entry:
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
define i32 @rematerialize_minus_one_pgso() !prof !14 {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; -1 should be re-materialized here instead of getting spilled above.
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: rematerialize_minus_one_pgso
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NOT: %eax
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
define i32 @rematerialize_minus_one_eflags_pgso(i32 %x) !prof !14 {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; Define eflags.
|
||||
%a = icmp ne i32 %x, 123
|
||||
%b = zext i1 %a to i32
|
||||
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
|
||||
; It must therefore not use the xor-dec lowering.
|
||||
%c = select i1 %a, i32 %b, i32 -1
|
||||
ret i32 %c
|
||||
|
||||
; CHECK32-LABEL: rematerialize_minus_one_eflags_pgso
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: cmpl
|
||||
; CHECK32: setne
|
||||
; CHECK32-NOT: xorl
|
||||
; CHECK32: movl $-1
|
||||
; CHECK32: cmov
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
declare x86_thiscallcc void @f(i32)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
1064
test/CodeGen/X86/memcmp-pgso.ll
Normal file
1064
test/CodeGen/X86/memcmp-pgso.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -139,6 +139,36 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test3_pgso(i8* nocapture %A, i8* nocapture %B) nounwind noredzone !prof !14 {
|
||||
; LINUX-LABEL: test3_pgso:
|
||||
; LINUX: # %bb.0: # %entry
|
||||
; LINUX-NEXT: movl $64, %edx
|
||||
; LINUX-NEXT: jmp memcpy # TAILCALL
|
||||
;
|
||||
; DARWIN-LABEL: test3_pgso:
|
||||
; DARWIN: ## %bb.0: ## %entry
|
||||
; DARWIN-NEXT: movq 56(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 56(%rdi)
|
||||
; DARWIN-NEXT: movq 48(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 48(%rdi)
|
||||
; DARWIN-NEXT: movq 40(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 40(%rdi)
|
||||
; DARWIN-NEXT: movq 32(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 32(%rdi)
|
||||
; DARWIN-NEXT: movq 24(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 24(%rdi)
|
||||
; DARWIN-NEXT: movq 16(%rsi), %rax
|
||||
; DARWIN-NEXT: movq %rax, 16(%rdi)
|
||||
; DARWIN-NEXT: movq (%rsi), %rax
|
||||
; DARWIN-NEXT: movq 8(%rsi), %rcx
|
||||
; DARWIN-NEXT: movq %rcx, 8(%rdi)
|
||||
; DARWIN-NEXT: movq %rax, (%rdi)
|
||||
; DARWIN-NEXT: retq
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
|
||||
; DARWIN-LABEL: test3_minsize:
|
||||
; DARWIN: ## %bb.0:
|
||||
@ -506,3 +536,20 @@ define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind
|
||||
tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* align 8 %a, i8 addrspace(256)* align 8 %b, i64 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -86,6 +86,39 @@ define double @pow_wrapper_optsize(double %a) optsize {
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @pow_wrapper_pgso(double %a) !prof !14 {
|
||||
; X86-X87-LABEL: pow_wrapper_pgso:
|
||||
; X86-X87: # %bb.0:
|
||||
; X86-X87-NEXT: subl $12, %esp
|
||||
; X86-X87-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; X86-X87-NEXT: fstpl (%esp)
|
||||
; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp)
|
||||
; X86-X87-NEXT: calll __powidf2
|
||||
; X86-X87-NEXT: addl $12, %esp
|
||||
; X86-X87-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-X87-NEXT: retl
|
||||
;
|
||||
; X86-SSE-LABEL: pow_wrapper_pgso:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: subl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: calll __powidf2
|
||||
; X86-SSE-NEXT: addl $12, %esp
|
||||
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pow_wrapper_pgso:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl $15, %edi
|
||||
; X64-NEXT: jmp __powidf2 # TAILCALL
|
||||
%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @pow_wrapper_minsize(double %a) minsize {
|
||||
; X86-X87-LABEL: pow_wrapper_minsize:
|
||||
; X86-X87: # %bb.0:
|
||||
@ -124,3 +157,19 @@ define double @pow_wrapper_minsize(double %a) minsize {
|
||||
|
||||
declare double @llvm.powi.f64(double, i32) nounwind readonly
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -252,3 +252,60 @@ define double @test12(double* %xptr) nounwind optsize {
|
||||
%call = tail call double @trunc(double %x) nounwind readnone
|
||||
ret double %call
|
||||
}
|
||||
|
||||
define float @test11_pgso(float* %xptr) nounwind !prof !14 {
|
||||
; CHECK-SSE-LABEL: test11_pgso:
|
||||
; CHECK-SSE: ## %bb.0:
|
||||
; CHECK-SSE-NEXT: roundss $11, (%rdi), %xmm0
|
||||
; CHECK-SSE-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX-LABEL: test11_pgso:
|
||||
; CHECK-AVX: ## %bb.0:
|
||||
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-AVX-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512-LABEL: test11_pgso:
|
||||
; CHECK-AVX512: ## %bb.0:
|
||||
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-AVX512-NEXT: retq
|
||||
%x = load float, float* %xptr
|
||||
%call = tail call float @truncf(float %x) nounwind readnone
|
||||
ret float %call
|
||||
}
|
||||
|
||||
define double @test12_pgso(double* %xptr) nounwind !prof !14 {
|
||||
; CHECK-SSE-LABEL: test12_pgso:
|
||||
; CHECK-SSE: ## %bb.0:
|
||||
; CHECK-SSE-NEXT: roundsd $11, (%rdi), %xmm0
|
||||
; CHECK-SSE-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX-LABEL: test12_pgso:
|
||||
; CHECK-AVX: ## %bb.0:
|
||||
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-AVX-NEXT: retq
|
||||
;
|
||||
; CHECK-AVX512-LABEL: test12_pgso:
|
||||
; CHECK-AVX512: ## %bb.0:
|
||||
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-AVX512-NEXT: retq
|
||||
%x = load double, double* %xptr
|
||||
%call = tail call double @trunc(double %x) nounwind readnone
|
||||
ret double %call
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
321
test/CodeGen/X86/shrink-compare-pgso.ll
Normal file
321
test/CodeGen/X86/shrink-compare-pgso.ll
Normal file
@ -0,0 +1,321 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
|
||||
|
||||
declare void @bar()
|
||||
|
||||
define void @test1(i32* nocapture %X) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $47, (%rdi)
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tmp1 = load i32, i32* %X, align 4
|
||||
%and = and i32 %tmp1, 255
|
||||
%cmp = icmp eq i32 %and, 47
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2(i32 %X) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $47, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%and = and i32 %X, 255
|
||||
%cmp = icmp eq i32 %and, 47
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test3(i32 %X) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $-1, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%and = and i32 %X, 255
|
||||
%cmp = icmp eq i32 %and, 255
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR16083
|
||||
define i1 @test4(i64 %a, i32 %b) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: je .LBB3_1
|
||||
; CHECK-NEXT: # %bb.2: # %lor.end
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB3_1: # %lor.rhs
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tobool = icmp ne i32 %b, 0
|
||||
br i1 %tobool, label %lor.end, label %lor.rhs
|
||||
|
||||
lor.rhs: ; preds = %entry
|
||||
%and = and i64 0, %a
|
||||
%tobool1 = icmp ne i64 %and, 0
|
||||
br label %lor.end
|
||||
|
||||
lor.end: ; preds = %lor.rhs, %entry
|
||||
%p = phi i1 [ true, %entry ], [ %tobool1, %lor.rhs ]
|
||||
ret i1 %p
|
||||
}
|
||||
|
||||
@x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4
|
||||
|
||||
; PR16551
|
||||
define void @test5(i32 %X) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movzbl x+{{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: shll $16, %eax
|
||||
; CHECK-NEXT: movzwl x+{{.*}}(%rip), %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: cmpl $1, %ecx
|
||||
; CHECK-NEXT: jne bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
|
||||
%bf.lshr = lshr i56 %bf.load, 32
|
||||
%bf.cast = trunc i56 %bf.lshr to i32
|
||||
%cmp = icmp ne i32 %bf.cast, 1
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2_1(i32 %X) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test2_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: cmpl $256, %eax # imm = 0x100
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%and = and i32 %X, 255
|
||||
%cmp = icmp eq i32 %and, 256
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_1(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $1, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, 1
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_47(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_47:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $47, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, 47
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_127(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_127:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $127, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, 127
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_neg1(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_neg1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $-1, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, -1
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_neg2(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_neg2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $-2, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, -2
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_neg127(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_neg127:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $-127, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, -127
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_neg128(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_neg128:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cmpb $-128, %dil
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, -128
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_sext_i8_icmp_255(i8 %x) nounwind !prof !14 {
|
||||
; CHECK-LABEL: test_sext_i8_icmp_255:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je bar # TAILCALL
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sext = sext i8 %x to i32
|
||||
%cmp = icmp eq i32 %sext, 255
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @bar() nounwind
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
@ -54,6 +54,26 @@ define i32 @dec_size(i32 %x) optsize {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @inc_pgso(i32 %x) !prof !14 {
|
||||
; CHECK-LABEL: inc_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: incl %eax
|
||||
; CHECK-NEXT: retl
|
||||
%r = add i32 %x, 1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @dec_pgso(i32 %x) !prof !14 {
|
||||
; CHECK-LABEL: dec_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: decl %eax
|
||||
; CHECK-NEXT: retl
|
||||
%r = add i32 %x, -1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
|
||||
declare void @other(i32* ) nounwind;
|
||||
|
||||
@ -62,20 +82,20 @@ define void @cond_ae_to_cond_ne(i32* %p) nounwind {
|
||||
; INCDEC: # %bb.0: # %entry
|
||||
; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; INCDEC-NEXT: incl (%eax)
|
||||
; INCDEC-NEXT: jne .LBB4_1
|
||||
; INCDEC-NEXT: jne .LBB6_1
|
||||
; INCDEC-NEXT: # %bb.2: # %if.end4
|
||||
; INCDEC-NEXT: jmp other # TAILCALL
|
||||
; INCDEC-NEXT: .LBB4_1: # %return
|
||||
; INCDEC-NEXT: .LBB6_1: # %return
|
||||
; INCDEC-NEXT: retl
|
||||
;
|
||||
; ADD-LABEL: cond_ae_to_cond_ne:
|
||||
; ADD: # %bb.0: # %entry
|
||||
; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; ADD-NEXT: addl $1, (%eax)
|
||||
; ADD-NEXT: jne .LBB4_1
|
||||
; ADD-NEXT: jne .LBB6_1
|
||||
; ADD-NEXT: # %bb.2: # %if.end4
|
||||
; ADD-NEXT: jmp other # TAILCALL
|
||||
; ADD-NEXT: .LBB4_1: # %return
|
||||
; ADD-NEXT: .LBB6_1: # %return
|
||||
; ADD-NEXT: retl
|
||||
entry:
|
||||
%t0 = load i32, i32* %p, align 8
|
||||
@ -109,10 +129,10 @@ define void @test_tail_call(i32* %ptr) nounwind {
|
||||
; INCDEC-NEXT: incb a
|
||||
; INCDEC-NEXT: sete d
|
||||
; INCDEC-NEXT: testb %al, %al
|
||||
; INCDEC-NEXT: jne .LBB5_2
|
||||
; INCDEC-NEXT: jne .LBB7_2
|
||||
; INCDEC-NEXT: # %bb.1: # %then
|
||||
; INCDEC-NEXT: jmp external_a # TAILCALL
|
||||
; INCDEC-NEXT: .LBB5_2: # %else
|
||||
; INCDEC-NEXT: .LBB7_2: # %else
|
||||
; INCDEC-NEXT: jmp external_b # TAILCALL
|
||||
;
|
||||
; ADD-LABEL: test_tail_call:
|
||||
@ -123,10 +143,10 @@ define void @test_tail_call(i32* %ptr) nounwind {
|
||||
; ADD-NEXT: addb $1, a
|
||||
; ADD-NEXT: sete d
|
||||
; ADD-NEXT: testb %al, %al
|
||||
; ADD-NEXT: jne .LBB5_2
|
||||
; ADD-NEXT: jne .LBB7_2
|
||||
; ADD-NEXT: # %bb.1: # %then
|
||||
; ADD-NEXT: jmp external_a # TAILCALL
|
||||
; ADD-NEXT: .LBB5_2: # %else
|
||||
; ADD-NEXT: .LBB7_2: # %else
|
||||
; ADD-NEXT: jmp external_b # TAILCALL
|
||||
entry:
|
||||
%val = load i32, i32* %ptr
|
||||
@ -152,3 +172,19 @@ else:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -17,6 +17,17 @@ define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
|
||||
ret <2 x double> %add
|
||||
}
|
||||
|
||||
define <2 x double> @splat_v2f64_pgso(<2 x double> %x) !prof !14 {
|
||||
; CHECK-LABEL: splat_v2f64_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
|
||||
; CHECK-NEXT: # xmm1 = mem[0,0]
|
||||
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%add = fadd <2 x double> %x, <double 1.0, double 1.0>
|
||||
ret <2 x double> %add
|
||||
}
|
||||
|
||||
define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
|
||||
; CHECK-LABEL: splat_v4f64:
|
||||
; CHECK: # %bb.0:
|
||||
@ -27,6 +38,16 @@ define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
|
||||
ret <4 x double> %add
|
||||
}
|
||||
|
||||
define <4 x double> @splat_v4f64_pgso(<4 x double> %x) !prof !14 {
|
||||
; CHECK-LABEL: splat_v4f64_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
|
||||
ret <4 x double> %add
|
||||
}
|
||||
|
||||
define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
|
||||
; CHECK-LABEL: splat_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
@ -37,6 +58,16 @@ define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
|
||||
ret <4 x float> %add
|
||||
}
|
||||
|
||||
define <4 x float> @splat_v4f32_pgso(<4 x float> %x) !prof !14 {
|
||||
; CHECK-LABEL: splat_v4f32_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
ret <4 x float> %add
|
||||
}
|
||||
|
||||
define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
|
||||
; CHECK-LABEL: splat_v8f32:
|
||||
; CHECK: # %bb.0:
|
||||
@ -47,6 +78,16 @@ define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
|
||||
ret <8 x float> %add
|
||||
}
|
||||
|
||||
define <8 x float> @splat_v8f32_pgso(<8 x float> %x) !prof !14 {
|
||||
; CHECK-LABEL: splat_v8f32_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
ret <8 x float> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
|
||||
; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
|
||||
define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
|
||||
@ -66,6 +107,23 @@ define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
|
||||
ret <2 x i64> %add
|
||||
}
|
||||
|
||||
define <2 x i64> @splat_v2i64_pgso(<2 x i64> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v2i64_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2]
|
||||
; AVX-NEXT: # xmm1 = mem[0,0]
|
||||
; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v2i64_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <2 x i64> %x, <i64 2, i64 2>
|
||||
ret <2 x i64> %add
|
||||
}
|
||||
|
||||
; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
|
||||
; and then we fake it: use vmovddup to splat 64-bit value.
|
||||
define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
|
||||
@ -88,6 +146,26 @@ define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
|
||||
ret <4 x i64> %add
|
||||
}
|
||||
|
||||
define <4 x i64> @splat_v4i64_pgso(<4 x i64> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v4i64_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2]
|
||||
; AVX-NEXT: # xmm2 = mem[0,0]
|
||||
; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v4i64_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
|
||||
ret <4 x i64> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
|
||||
define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
|
||||
; AVX-LABEL: splat_v4i32:
|
||||
@ -105,6 +183,22 @@ define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
|
||||
ret <4 x i32> %add
|
||||
}
|
||||
|
||||
define <4 x i32> @splat_v4i32_pgso(<4 x i32> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v4i32_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v4i32_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
|
||||
ret <4 x i32> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
|
||||
define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
|
||||
; AVX-LABEL: splat_v8i32:
|
||||
@ -125,6 +219,25 @@ define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
|
||||
ret <8 x i32> %add
|
||||
}
|
||||
|
||||
define <8 x i32> @splat_v8i32_pgso(<8 x i32> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v8i32_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
|
||||
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v8i32_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
ret <8 x i32> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
|
||||
define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
|
||||
; AVX-LABEL: splat_v8i16:
|
||||
@ -141,6 +254,21 @@ define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
|
||||
ret <8 x i16> %add
|
||||
}
|
||||
|
||||
define <8 x i16> @splat_v8i16_pgso(<8 x i16> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v8i16_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v8i16_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
ret <8 x i16> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
|
||||
define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
|
||||
; AVX-LABEL: splat_v16i16:
|
||||
@ -161,6 +289,25 @@ define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
|
||||
ret <16 x i16> %add
|
||||
}
|
||||
|
||||
define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v16i16_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
|
||||
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v16i16_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
ret <16 x i16> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
|
||||
define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
|
||||
; AVX-LABEL: splat_v16i8:
|
||||
@ -177,6 +324,21 @@ define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
|
||||
ret <16 x i8> %add
|
||||
}
|
||||
|
||||
define <16 x i8> @splat_v16i8_pgso(<16 x i8> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v16i8_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v16i8_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
||||
ret <16 x i8> %add
|
||||
}
|
||||
|
||||
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
|
||||
define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
|
||||
; AVX-LABEL: splat_v32i8:
|
||||
@ -197,6 +359,25 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
|
||||
ret <32 x i8> %add
|
||||
}
|
||||
|
||||
define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
|
||||
; AVX-LABEL: splat_v32i8_pgso:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v32i8_pgso:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
||||
ret <32 x i8> %add
|
||||
}
|
||||
|
||||
; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
|
||||
; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
|
||||
; loadi64 with multiple uses.
|
||||
@ -238,3 +419,20 @@ entry:
|
||||
|
||||
attributes #0 = { optsize }
|
||||
attributes #1 = { minsize }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -19,6 +19,23 @@ entry:
|
||||
|
||||
}
|
||||
|
||||
define void @zero_pgso(i32* %p) !prof !14 {
|
||||
; CHECK32-LABEL: zero_pgso:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK32-NEXT: movl $0, (%eax)
|
||||
; CHECK32-NEXT: retl
|
||||
;
|
||||
; CHECK64-LABEL: zero_pgso:
|
||||
; CHECK64: # %bb.0: # %entry
|
||||
; CHECK64-NEXT: movl $0, (%rdi)
|
||||
; CHECK64-NEXT: retq
|
||||
entry:
|
||||
store i32 0, i32* %p
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
define void @minus_one_optsize(i32* %p) optsize {
|
||||
; CHECK32-LABEL: minus_one_optsize:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
@ -36,6 +53,22 @@ entry:
|
||||
|
||||
}
|
||||
|
||||
define void @minus_one_pgso(i32* %p) !prof !14 {
|
||||
; CHECK32-LABEL: minus_one_pgso:
|
||||
; CHECK32: # %bb.0: # %entry
|
||||
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK32-NEXT: movl $-1, (%eax)
|
||||
; CHECK32-NEXT: retl
|
||||
;
|
||||
; CHECK64-LABEL: minus_one_pgso:
|
||||
; CHECK64: # %bb.0: # %entry
|
||||
; CHECK64-NEXT: movl $-1, (%rdi)
|
||||
; CHECK64-NEXT: retq
|
||||
entry:
|
||||
store i32 -1, i32* %p
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
define void @zero_64(i64* %p) minsize {
|
||||
; CHECK32-LABEL: zero_64:
|
||||
@ -244,3 +277,20 @@ entry:
|
||||
store volatile i16 -1, i16* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -79,3 +79,72 @@ return: ret void
|
||||
; CHECK: ja
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
define void @dense_optsize(i32 %x) optsize {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 12, label %bb0
|
||||
i32 4, label %bb1
|
||||
i32 16, label %bb1
|
||||
i32 20, label %bb2
|
||||
i32 8, label %bb3
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 1) br label %return
|
||||
bb3: tail call void @g(i32 2) br label %return
|
||||
return: ret void
|
||||
|
||||
; Lowered as branches.
|
||||
; CHECK-LABEL: dense_optsize
|
||||
; CHECK: cmpl $11
|
||||
; CHECK: cmpl $20
|
||||
; CHECK: cmpl $16
|
||||
; CHECK: cmpl $12
|
||||
; CHECK: cmpl $4
|
||||
; CHECK: cmpl $8
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
define void @dense_pgso(i32 %x) !prof !14 {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 12, label %bb0
|
||||
i32 4, label %bb1
|
||||
i32 16, label %bb1
|
||||
i32 20, label %bb2
|
||||
i32 8, label %bb3
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 1) br label %return
|
||||
bb3: tail call void @g(i32 2) br label %return
|
||||
return: ret void
|
||||
|
||||
; Lowered as branches.
|
||||
; CHECK-LABEL: dense_pgso
|
||||
; CHECK: cmpl $11
|
||||
; CHECK: cmpl $20
|
||||
; CHECK: cmpl $16
|
||||
; CHECK: cmpl $12
|
||||
; CHECK: cmpl $4
|
||||
; CHECK: cmpl $8
|
||||
; CHECK: retq
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -473,6 +473,47 @@ return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @one_pgso(i32 %v) nounwind !prof !14 {
|
||||
; CHECK-LABEL: one_pgso:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: je .LBB6_3
|
||||
; CHECK-NEXT: # %bb.1: # %bby
|
||||
; CHECK-NEXT: cmpl $16, %edi
|
||||
; CHECK-NEXT: je .LBB6_4
|
||||
; CHECK-NEXT: # %bb.2: # %bb7
|
||||
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
||||
; CHECK-NEXT: .LBB6_3: # %bbx
|
||||
; CHECK-NEXT: cmpl $128, %edi
|
||||
; CHECK-NEXT: jne tail_call_me # TAILCALL
|
||||
; CHECK-NEXT: .LBB6_4: # %return
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = icmp eq i32 %v, 0
|
||||
br i1 %0, label %bbx, label %bby
|
||||
|
||||
bby:
|
||||
switch i32 %v, label %bb7 [
|
||||
i32 16, label %return
|
||||
]
|
||||
|
||||
bb7:
|
||||
tail call void @tail_call_me()
|
||||
ret void
|
||||
|
||||
bbx:
|
||||
switch i32 %v, label %bb12 [
|
||||
i32 128, label %return
|
||||
]
|
||||
|
||||
bb12:
|
||||
tail call void @tail_call_me()
|
||||
ret void
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; two - Same as one, but with two instructions in the common
|
||||
; tail instead of one. This is too much to be merged, given
|
||||
; the optsize attribute.
|
||||
@ -484,49 +525,6 @@ define void @two() nounwind optsize {
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB6_1
|
||||
; CHECK-NEXT: # %bb.2: # %return
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB6_1: # %bb7
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
||||
entry:
|
||||
%0 = icmp eq i32 undef, 0
|
||||
br i1 %0, label %bbx, label %bby
|
||||
|
||||
bby:
|
||||
switch i32 undef, label %bb7 [
|
||||
i32 16, label %return
|
||||
]
|
||||
|
||||
bb7:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
bbx:
|
||||
switch i32 undef, label %bb12 [
|
||||
i32 128, label %return
|
||||
]
|
||||
|
||||
bb12:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; two_minsize - Same as two, but with minsize instead of optsize.
|
||||
|
||||
define void @two_minsize() nounwind minsize {
|
||||
; CHECK-LABEL: two_minsize:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB7_1
|
||||
; CHECK-NEXT: # %bb.2: # %return
|
||||
; CHECK-NEXT: retq
|
||||
@ -561,6 +559,90 @@ return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @two_pgso() nounwind !prof !14 {
|
||||
; CHECK-LABEL: two_pgso:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB8_1
|
||||
; CHECK-NEXT: # %bb.2: # %return
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB8_1: # %bb7
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
||||
entry:
|
||||
%0 = icmp eq i32 undef, 0
|
||||
br i1 %0, label %bbx, label %bby
|
||||
|
||||
bby:
|
||||
switch i32 undef, label %bb7 [
|
||||
i32 16, label %return
|
||||
]
|
||||
|
||||
bb7:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
bbx:
|
||||
switch i32 undef, label %bb12 [
|
||||
i32 128, label %return
|
||||
]
|
||||
|
||||
bb12:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; two_minsize - Same as two, but with minsize instead of optsize.
|
||||
|
||||
define void @two_minsize() nounwind minsize {
|
||||
; CHECK-LABEL: two_minsize:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB9_1
|
||||
; CHECK-NEXT: # %bb.2: # %return
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB9_1: # %bb7
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
||||
entry:
|
||||
%0 = icmp eq i32 undef, 0
|
||||
br i1 %0, label %bbx, label %bby
|
||||
|
||||
bby:
|
||||
switch i32 undef, label %bb7 [
|
||||
i32 16, label %return
|
||||
]
|
||||
|
||||
bb7:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
bbx:
|
||||
switch i32 undef, label %bb12 [
|
||||
i32 128, label %return
|
||||
]
|
||||
|
||||
bb12:
|
||||
store volatile i32 0, i32* @XYZ
|
||||
store volatile i32 1, i32* @XYZ
|
||||
unreachable
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; two_nosize - Same as two, but without the optsize attribute.
|
||||
; Now two instructions are enough to be tail-duplicated.
|
||||
|
||||
@ -568,20 +650,20 @@ define void @two_nosize(i32 %x, i32 %y, i32 %z) nounwind {
|
||||
; CHECK-LABEL: two_nosize:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: je .LBB8_3
|
||||
; CHECK-NEXT: je .LBB10_3
|
||||
; CHECK-NEXT: # %bb.1: # %bby
|
||||
; CHECK-NEXT: testl %esi, %esi
|
||||
; CHECK-NEXT: je .LBB8_4
|
||||
; CHECK-NEXT: je .LBB10_4
|
||||
; CHECK-NEXT: # %bb.2: # %bb7
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
||||
; CHECK-NEXT: .LBB8_3: # %bbx
|
||||
; CHECK-NEXT: .LBB10_3: # %bbx
|
||||
; CHECK-NEXT: cmpl $-1, %edx
|
||||
; CHECK-NEXT: je .LBB8_4
|
||||
; CHECK-NEXT: je .LBB10_4
|
||||
; CHECK-NEXT: # %bb.5: # %bb12
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
||||
; CHECK-NEXT: .LBB8_4: # %return
|
||||
; CHECK-NEXT: .LBB10_4: # %return
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = icmp eq i32 %x, 0
|
||||
@ -621,11 +703,11 @@ define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
|
||||
; CHECK-NEXT: movl $1, %eax
|
||||
; CHECK-NEXT: cmovgq %rdi, %rax
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: jle .LBB9_2
|
||||
; CHECK-NEXT: jle .LBB11_2
|
||||
; CHECK-NEXT: # %bb.1: # %bb.nph
|
||||
; CHECK-NEXT: imulq %rdi, %rsi
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: .LBB9_2: # %for.end
|
||||
; CHECK-NEXT: .LBB11_2: # %for.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
|
||||
@ -654,24 +736,24 @@ define void @merge_aborts() {
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB10_5
|
||||
; CHECK-NEXT: je .LBB12_5
|
||||
; CHECK-NEXT: # %bb.1: # %cont1
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB10_5
|
||||
; CHECK-NEXT: je .LBB12_5
|
||||
; CHECK-NEXT: # %bb.2: # %cont2
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB10_5
|
||||
; CHECK-NEXT: je .LBB12_5
|
||||
; CHECK-NEXT: # %bb.3: # %cont3
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB10_5
|
||||
; CHECK-NEXT: je .LBB12_5
|
||||
; CHECK-NEXT: # %bb.4: # %cont4
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB10_5: # %abort1
|
||||
; CHECK-NEXT: .LBB12_5: # %abort1
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: callq abort
|
||||
entry:
|
||||
@ -714,27 +796,27 @@ define void @merge_alternating_aborts() {
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB11_5
|
||||
; CHECK-NEXT: je .LBB13_5
|
||||
; CHECK-NEXT: # %bb.1: # %cont1
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB11_6
|
||||
; CHECK-NEXT: je .LBB13_6
|
||||
; CHECK-NEXT: # %bb.2: # %cont2
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB11_5
|
||||
; CHECK-NEXT: je .LBB13_5
|
||||
; CHECK-NEXT: # %bb.3: # %cont3
|
||||
; CHECK-NEXT: callq qux
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je .LBB11_6
|
||||
; CHECK-NEXT: je .LBB13_6
|
||||
; CHECK-NEXT: # %bb.4: # %cont4
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB11_5: # %abort1
|
||||
; CHECK-NEXT: .LBB13_5: # %abort1
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: callq abort
|
||||
; CHECK-NEXT: .LBB11_6: # %abort2
|
||||
; CHECK-NEXT: .LBB13_6: # %abort2
|
||||
; CHECK-NEXT: callq alt_abort
|
||||
entry:
|
||||
%c1 = call i1 @qux()
|
||||
@ -763,3 +845,20 @@ abort4:
|
||||
cont4:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -49,6 +49,30 @@ no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test64_pgso(i64 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test64_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jb .LBB2_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB2_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i64 %x, 2048
|
||||
%s = icmp eq i64 %t, 0
|
||||
br i1 %s, label %yes, label %no
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This test is identical to test64 above with only the destination of the br
|
||||
; reversed. This somehow causes the two functions to get slightly different
|
||||
; initial IR. One has an extra invert of the setcc. This previous caused one
|
||||
@ -60,10 +84,10 @@ define void @test64_2(i64 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
|
||||
; CHECK-NEXT: je .LBB2_2
|
||||
; CHECK-NEXT: je .LBB3_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB2_2: # %no
|
||||
; CHECK-NEXT: .LBB3_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -84,10 +108,34 @@ define void @test64_optsize_2(i64 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB3_2
|
||||
; CHECK-NEXT: jae .LBB4_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB3_2: # %no
|
||||
; CHECK-NEXT: .LBB4_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i64 %x, 2048
|
||||
%s = icmp eq i64 %t, 0
|
||||
br i1 %s, label %no, label %yes
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test64_pgso_2(i64 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test64_pgso_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB5_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB5_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -108,10 +156,10 @@ define void @test64_3(i64 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jb .LBB4_2
|
||||
; CHECK-NEXT: jb .LBB6_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB4_2: # %no
|
||||
; CHECK-NEXT: .LBB6_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -132,10 +180,34 @@ define void @test64_optsize_3(i64 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jb .LBB5_2
|
||||
; CHECK-NEXT: jb .LBB7_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB5_2: # %no
|
||||
; CHECK-NEXT: .LBB7_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i64 %x, 4294967296
|
||||
%s = icmp eq i64 %t, 0
|
||||
br i1 %s, label %yes, label %no
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test64_pgso_3(i64 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test64_pgso_3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jb .LBB8_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB8_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -156,10 +228,10 @@ define void @test64_4(i64 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jae .LBB6_2
|
||||
; CHECK-NEXT: jae .LBB9_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB6_2: # %no
|
||||
; CHECK-NEXT: .LBB9_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -180,10 +252,34 @@ define void @test64_optsize_4(i64 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jae .LBB7_2
|
||||
; CHECK-NEXT: jae .LBB10_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB7_2: # %no
|
||||
; CHECK-NEXT: .LBB10_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i64 %x, 4294967296
|
||||
%s = icmp eq i64 %t, 0
|
||||
br i1 %s, label %no, label %yes
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test64_pgso_4(i64 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test64_pgso_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btq $32, %rdi
|
||||
; CHECK-NEXT: jae .LBB11_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB11_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -204,10 +300,10 @@ define void @test32(i32 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
|
||||
; CHECK-NEXT: jne .LBB8_2
|
||||
; CHECK-NEXT: jne .LBB12_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB8_2: # %no
|
||||
; CHECK-NEXT: .LBB12_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -228,10 +324,10 @@ define void @test32_optsize(i32 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jb .LBB9_2
|
||||
; CHECK-NEXT: jb .LBB13_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB9_2: # %no
|
||||
; CHECK-NEXT: .LBB13_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -252,10 +348,10 @@ define void @test32_2(i32 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
|
||||
; CHECK-NEXT: je .LBB10_2
|
||||
; CHECK-NEXT: je .LBB14_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB10_2: # %no
|
||||
; CHECK-NEXT: .LBB14_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -276,10 +372,34 @@ define void @test32_optsize_2(i32 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB11_2
|
||||
; CHECK-NEXT: jae .LBB15_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB11_2: # %no
|
||||
; CHECK-NEXT: .LBB15_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i32 %x, 2048
|
||||
%s = icmp eq i32 %t, 0
|
||||
br i1 %s, label %no, label %yes
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test32_pgso_2(i32 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test32_pgso_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB16_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB16_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -300,10 +420,10 @@ define void @test16(i16 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
|
||||
; CHECK-NEXT: jne .LBB12_2
|
||||
; CHECK-NEXT: jne .LBB17_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB12_2: # %no
|
||||
; CHECK-NEXT: .LBB17_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -324,10 +444,34 @@ define void @test16_optsize(i16 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jb .LBB13_2
|
||||
; CHECK-NEXT: jb .LBB18_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB13_2: # %no
|
||||
; CHECK-NEXT: .LBB18_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i16 %x, 2048
|
||||
%s = icmp eq i16 %t, 0
|
||||
br i1 %s, label %yes, label %no
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test16_pgso(i16 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test16_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jb .LBB19_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB19_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -348,10 +492,10 @@ define void @test16_2(i16 inreg %x) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: testl $2048, %edi # imm = 0x800
|
||||
; CHECK-NEXT: je .LBB14_2
|
||||
; CHECK-NEXT: je .LBB20_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB14_2: # %no
|
||||
; CHECK-NEXT: .LBB20_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -372,10 +516,34 @@ define void @test16_optsize_2(i16 inreg %x) optsize {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB15_2
|
||||
; CHECK-NEXT: jae .LBB21_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB15_2: # %no
|
||||
; CHECK-NEXT: .LBB21_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%t = and i16 %x, 2048
|
||||
%s = icmp eq i16 %t, 0
|
||||
br i1 %s, label %no, label %yes
|
||||
|
||||
yes:
|
||||
call void @bar()
|
||||
ret void
|
||||
no:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test16_pgso_2(i16 inreg %x) !prof !14 {
|
||||
; CHECK-LABEL: test16_pgso_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: btl $11, %edi
|
||||
; CHECK-NEXT: jae .LBB22_2
|
||||
; CHECK-NEXT: # %bb.1: # %yes
|
||||
; CHECK-NEXT: callq bar
|
||||
; CHECK-NEXT: .LBB22_2: # %no
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
@ -512,3 +680,20 @@ define i32 @setcc_is_bit_set(i32 %x) {
|
||||
}
|
||||
|
||||
declare void @bar()
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -2002,6 +2002,56 @@ define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
|
||||
ret <8 x i32> %b
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0zzz_pgso(<4 x double> %a) !prof !14 {
|
||||
; ALL-LABEL: shuffle_v4f64_0zzz_pgso:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; ALL-NEXT: retq
|
||||
%b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||
ret <4 x double> %b
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_0zzz_pgso(<4 x i64> %a) !prof !14 {
|
||||
; ALL-LABEL: shuffle_v4i64_0zzz_pgso:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; ALL-NEXT: retq
|
||||
%b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||
ret <4 x i64> %b
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_0zzzzzzz_pgso(<8 x float> %a) !prof !14 {
|
||||
; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_pgso:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
%b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <8 x float> %b
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_0zzzzzzz_pgso(<8 x i32> %a) !prof !14 {
|
||||
; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_pgso:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
%b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <8 x i32> %b
|
||||
}
|
||||
|
||||
define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) {
|
||||
; ALL-LABEL: unpckh_v4i64:
|
||||
; ALL: # %bb.0:
|
||||
@ -2022,3 +2072,19 @@ define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
|
||||
ret <4 x double> %unpckh
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -240,3 +240,140 @@ define i64 @xor4_optsize(i64 %x) optsize {
|
||||
%a = xor i64 %x, 9223372036854775808 ; toggle bit 63
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @and1_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: and1_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btrq $31, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = and i64 %x, 18446744071562067967 ; clear bit 31
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @and2_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: and2_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btrq $32, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = and i64 %x, 18446744069414584319 ; clear bit 32
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @and3_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: and3_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btrq $62, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = and i64 %x, 13835058055282163711 ; clear bit 62
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @and4_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: and4_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btrq $63, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = and i64 %x, 9223372036854775807 ; clear bit 63
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @or1_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: or1_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btsq $31, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = or i64 %x, 2147483648 ; set bit 31
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @or2_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: or2_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btsq $32, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = or i64 %x, 4294967296 ; set bit 32
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @or3_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: or3_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btsq $62, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = or i64 %x, 4611686018427387904 ; set bit 62
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @or4_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: or4_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btsq $63, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = or i64 %x, 9223372036854775808 ; set bit 63
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @xor1_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: xor1_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btcq $31, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = xor i64 %x, 2147483648 ; toggle bit 31
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @xor2_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: xor2_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btcq $32, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = xor i64 %x, 4294967296 ; toggle bit 32
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @xor3_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: xor3_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btcq $62, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = xor i64 %x, 4611686018427387904 ; toggle bit 62
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @xor4_pgso(i64 %x) !prof !14 {
|
||||
; CHECK-LABEL: xor4_pgso:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: btcq $63, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = xor i64 %x, 9223372036854775808 ; toggle bit 63
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -50,6 +50,19 @@ entry:
|
||||
ret i64 %or
|
||||
}
|
||||
|
||||
define i64 @_Z8lshift11mm_pgso(i64 %a, i64 %b) !prof !14 {
|
||||
; CHECK-LABEL: _Z8lshift11mm_pgso:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: shldq $11, %rsi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%shl = shl i64 %a, 11
|
||||
%shr = lshr i64 %b, 53
|
||||
%or = or i64 %shr, %shl
|
||||
ret i64 %or
|
||||
}
|
||||
|
||||
attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
; clang -O2 -c test2.cpp -emit-llvm -S
|
||||
@ -78,3 +91,19 @@ entry:
|
||||
|
||||
attributes #2= { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -25,6 +25,26 @@ while.end: ; preds = %while.body
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f_pgso(i8* %p, i8* %q, i32* inalloca nocapture %unused) !prof !14 {
|
||||
entry:
|
||||
%g = alloca %struct.T, align 8
|
||||
%r = alloca i32, align 8
|
||||
store i32 0, i32* %r, align 4
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %p, i8* align 8 %q, i32 24, i1 false)
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%load = load i32, i32* %r, align 4
|
||||
%dec = add nsw i32 %load, -1
|
||||
store i32 %dec, i32* %r, align 4
|
||||
call void @g(%struct.T* %g)
|
||||
%tobool = icmp eq i32 %dec, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1
|
||||
|
||||
@ -46,5 +66,38 @@ declare void @g(%struct.T*)
|
||||
; CHECK: testb %[[NE_REG]], %[[NE_REG]]
|
||||
; CHECK: jne
|
||||
|
||||
; CHECK-LABEL: _f_pgso:
|
||||
; CHECK: pushl %ebp
|
||||
; CHECK: movl %esp, %ebp
|
||||
; CHECK: andl $-8, %esp
|
||||
; CHECK-NOT: movl %esp, %esi
|
||||
; CHECK: rep;movsl
|
||||
; CHECK: leal 8(%esp), %esi
|
||||
|
||||
; CHECK: decl (%esp)
|
||||
; CHECK: setne %[[NE_REG:.*]]
|
||||
; CHECK: pushl %esi
|
||||
; CHECK: calll _g
|
||||
; CHECK: addl $4, %esp
|
||||
; CHECK: testb %[[NE_REG]], %[[NE_REG]]
|
||||
; CHECK: jne
|
||||
|
||||
attributes #0 = { nounwind optsize }
|
||||
attributes #1 = { argmemonly nounwind }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
@ -152,6 +152,30 @@ rare.1:
|
||||
br label %fallthrough
|
||||
}
|
||||
|
||||
; Negative test - opt for size
|
||||
define void @test6_pgso(i1 %cond, i64* %base) !prof !14 {
|
||||
; CHECK-LABEL: @test6
|
||||
entry:
|
||||
; CHECK: %addr = getelementptr
|
||||
%addr = getelementptr inbounds i64, i64* %base, i64 5
|
||||
%casted = bitcast i64* %addr to i32*
|
||||
br i1 %cond, label %if.then, label %fallthrough
|
||||
|
||||
if.then:
|
||||
; CHECK-LABEL: if.then:
|
||||
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
|
||||
%v1 = load i32, i32* %casted, align 4
|
||||
call void @foo(i32 %v1)
|
||||
%cmp = icmp eq i32 %v1, 0
|
||||
br i1 %cmp, label %rare.1, label %fallthrough
|
||||
|
||||
fallthrough:
|
||||
ret void
|
||||
|
||||
rare.1:
|
||||
call void @slowpath(i32 %v1, i32* %casted) cold
|
||||
br label %fallthrough
|
||||
}
|
||||
|
||||
; Make sure sinking two copies of addressing mode into different blocks works
|
||||
; when there are cold paths for each.
|
||||
@ -278,3 +302,20 @@ BB:
|
||||
store i1 false, i1* %G23
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
Loading…
x
Reference in New Issue
Block a user