mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening
If we lower a v2i64 shuffle to PSHUFD, we currently clamp undef elements to 0, (elements 0,1 of the v4i32) which can result in the shuffle referencing more elements of the source vector than expected, affecting later shuffle combines and KnownBits/SimplifyDemanded calls. By ensuring we widen the undef mask element we allow getV4X86ShuffleImm8 to use inline elements as the default, which are more likely to fold.
This commit is contained in:
parent
d87103aee2
commit
1d52f2e8fe
@ -13723,9 +13723,10 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
// onward this has a single fast instruction with no scary immediates.
|
||||
// We have to map the mask as it is actually a v4i32 shuffle instruction.
|
||||
V1 = DAG.getBitcast(MVT::v4i32, V1);
|
||||
int WidenedMask[4] = {
|
||||
std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
|
||||
std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
|
||||
int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),
|
||||
Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),
|
||||
Mask[1] < 0 ? -1 : (Mask[1] * 2),
|
||||
Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};
|
||||
return DAG.getBitcast(
|
||||
MVT::v2i64,
|
||||
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
|
||||
|
@ -160,9 +160,9 @@ define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
|
||||
@ -172,10 +172,10 @@ define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind {
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm8 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa (%rsi), %xmm6
|
||||
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm6[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm6[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[1,1,2,3]
|
||||
@ -454,24 +454,24 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm4
|
||||
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm15 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm11 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm14 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm13 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm4[1,1,2,3]
|
||||
@ -482,10 +482,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
||||
; AVX1-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm4
|
||||
; AVX1-NEXT: vmovdqa 32(%rsi), %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm5, %xmm12
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm5, %xmm6, %xmm10
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[1,1,2,3]
|
||||
@ -493,10 +493,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm6, %xmm7, %xmm9
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm8
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm15, %xmm15
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm7, %xmm11, %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
@ -504,10 +504,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm14, %xmm14
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm13, %xmm13
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 16-byte Folded Reload
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm6 # 16-byte Folded Reload
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm4[1,1,2,3]
|
||||
|
@ -33,7 +33,7 @@ define <8 x float> @sitofp02(<8 x i16> %a) {
|
||||
; AVX-LABEL: sitofp02:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
|
@ -5,7 +5,7 @@
|
||||
define i32 @hadd_16(<16 x i32> %x225) {
|
||||
; KNL-LABEL: hadd_16:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -14,7 +14,7 @@ define i32 @hadd_16(<16 x i32> %x225) {
|
||||
;
|
||||
; SKX-LABEL: hadd_16:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -32,7 +32,7 @@ define i32 @hadd_16(<16 x i32> %x225) {
|
||||
define i32 @hsub_16(<16 x i32> %x225) {
|
||||
; KNL-LABEL: hsub_16:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; KNL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
@ -41,7 +41,7 @@ define i32 @hsub_16(<16 x i32> %x225) {
|
||||
;
|
||||
; SKX-LABEL: hsub_16:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
|
@ -6550,7 +6550,7 @@ define i64 @test_mm512_reduce_add_epi64(<8 x i64> %__W) {
|
||||
; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6563,7 +6563,7 @@ define i64 @test_mm512_reduce_add_epi64(<8 x i64> %__W) {
|
||||
; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
@ -6602,7 +6602,7 @@ define i64 @test_mm512_reduce_mul_epi64(<8 x i64> %__W) {
|
||||
; X86-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
@ -6636,7 +6636,7 @@ define i64 @test_mm512_reduce_mul_epi64(<8 x i64> %__W) {
|
||||
; X64-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X64-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
||||
; X64-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
@ -6668,7 +6668,7 @@ define i64 @test_mm512_reduce_or_epi64(<8 x i64> %__W) {
|
||||
; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6681,7 +6681,7 @@ define i64 @test_mm512_reduce_or_epi64(<8 x i64> %__W) {
|
||||
; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
@ -6706,7 +6706,7 @@ define i64 @test_mm512_reduce_and_epi64(<8 x i64> %__W) {
|
||||
; X86-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6719,7 +6719,7 @@ define i64 @test_mm512_reduce_and_epi64(<8 x i64> %__W) {
|
||||
; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpand %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
@ -6747,7 +6747,7 @@ define i64 @test_mm512_mask_reduce_add_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6762,7 +6762,7 @@ define i64 @test_mm512_mask_reduce_add_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
@ -6807,7 +6807,7 @@ define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X86-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
@ -6844,7 +6844,7 @@ define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X64-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X64-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
||||
; X64-NEXT: vpsrlq $32, %xmm0, %xmm3
|
||||
@ -6882,7 +6882,7 @@ define i64 @test_mm512_mask_reduce_and_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X86-NEXT: vpand %ymm0, %ymm1, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6898,7 +6898,7 @@ define i64 @test_mm512_mask_reduce_and_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X64-NEXT: vpand %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpand %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
@ -6928,7 +6928,7 @@ define i64 @test_mm512_mask_reduce_or_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
@ -6943,7 +6943,7 @@ define i64 @test_mm512_mask_reduce_or_epi64(i8 zeroext %__M, <8 x i64> %__W) {
|
||||
; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
|
@ -114,21 +114,21 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
|
||||
define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract2_i32_zext_insert0_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
|
@ -31,7 +31,7 @@ define <8 x i32> @sext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %
|
||||
; SSE41-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm5
|
||||
; SSE41-NEXT: pmovsxwd %xmm5, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -39,9 +39,9 @@ define <8 x i32> @sext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm5, %xmm5
|
||||
; AVX1-NEXT: vblendvps %xmm1, %xmm4, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
|
||||
@ -87,7 +87,7 @@ define <8 x i32> @zext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %
|
||||
; SSE41-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm5
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -459,7 +459,7 @@ define void @example24(i16 signext %x, i16 signext %y) nounwind {
|
||||
; SSE41-NEXT: packssdw %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovsxwd %xmm3, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm3, dj+4096(%rax)
|
||||
@ -480,9 +480,9 @@ define void @example24(i16 signext %x, i16 signext %y) nounwind {
|
||||
; AVX1-NEXT: movq $-4096, %rax # imm = 0xF000
|
||||
; AVX1-NEXT: vpmovsxwd %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm3, %xmm3
|
||||
; AVX1-NEXT: .p2align 4, 0x90
|
||||
; AVX1-NEXT: .LBB6_1: # %vector.body
|
||||
|
@ -696,7 +696,7 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
|
||||
; SSE2-LABEL: _clearupper16xi8b:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pushq %rbx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %r10
|
||||
; SSE2-NEXT: movq %r10, %r8
|
||||
; SSE2-NEXT: shrq $56, %r8
|
||||
@ -878,7 +878,7 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
|
||||
; SSE2-LABEL: _clearupper32xi8b:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pushq %rbx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %r10
|
||||
; SSE2-NEXT: movq %r10, %r8
|
||||
; SSE2-NEXT: shrq $56, %r8
|
||||
|
@ -93,7 +93,7 @@ define i32 @movmskps_sext_v4i64(<4 x i32> %a0) {
|
||||
; AVX1-LABEL: movmskps_sext_v4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
||||
@ -116,7 +116,7 @@ define i32 @movmskps_sext_v8i32(<8 x i16> %a0) {
|
||||
; AVX1-LABEL: movmskps_sext_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||
|
@ -2959,7 +2959,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
|
||||
;
|
||||
; SSE41-LABEL: pr38658:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
@ -2984,7 +2984,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
|
||||
;
|
||||
; AVX1-LABEL: pr38658:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
@ -3058,7 +3058,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
|
||||
;
|
||||
; XOP-LABEL: pr38658:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; XOP-NEXT: vpmovsxbw %xmm1, %xmm1
|
||||
; XOP-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -311,7 +311,7 @@ define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
|
||||
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
@ -344,7 +344,7 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -377,7 +377,7 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
|
||||
; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
|
@ -152,7 +152,7 @@ define <4 x i32> @combine_vec_ashr_trunc_and(<4 x i32> %x, <4 x i64> %y) {
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: psrad %xmm2, %xmm3
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE-NEXT: psrad %xmm4, %xmm5
|
||||
@ -272,7 +272,7 @@ define <4 x i32> @combine_vec_ashr_positive(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: psrld %xmm2, %xmm3
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE-NEXT: psrld %xmm4, %xmm5
|
||||
|
@ -400,7 +400,7 @@ define <4 x i32> @combine_vec_lshr_trunc_and(<4 x i32> %x, <4 x i64> %y) {
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: psrld %xmm2, %xmm3
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE-NEXT: psrld %xmm4, %xmm5
|
||||
|
@ -233,7 +233,7 @@ define <4 x i32> @combine_vec_udiv_by_pow2c(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrld %xmm4, %xmm2
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm3, %xmm4
|
||||
@ -249,7 +249,7 @@ define <4 x i32> @combine_vec_udiv_by_pow2c(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrld %xmm2, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm5
|
||||
@ -307,7 +307,7 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2a(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrld %xmm4, %xmm2
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm3, %xmm4
|
||||
@ -324,7 +324,7 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2a(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrld %xmm2, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm5
|
||||
@ -384,7 +384,7 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrld %xmm4, %xmm2
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm3, %xmm4
|
||||
@ -401,7 +401,7 @@ define <4 x i32> @combine_vec_udiv_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrld %xmm2, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm5
|
||||
|
@ -213,7 +213,7 @@ define <4 x i32> @combine_vec_urem_by_pow2d(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE-NEXT: psrld %xmm2, %xmm4
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSE-NEXT: psrld %xmm5, %xmm6
|
||||
|
@ -565,9 +565,9 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
|
||||
; X86-NEXT: cltd
|
||||
; X86-NEXT: idivl %esi
|
||||
; X86-NEXT: movd %eax, %xmm2
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm3, %eax
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm3, %esi
|
||||
; X86-NEXT: cltd
|
||||
; X86-NEXT: idivl %esi
|
||||
@ -608,9 +608,9 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
|
||||
; X64-NEXT: cltd
|
||||
; X64-NEXT: idivl %ecx
|
||||
; X64-NEXT: movd %eax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %ecx
|
||||
; X64-NEXT: cltd
|
||||
; X64-NEXT: idivl %ecx
|
||||
@ -657,11 +657,11 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; X86-NEXT: movd %xmm2, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm2, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; X86-NEXT: movd %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm1, (%esp)
|
||||
; X86-NEXT: calll __divdi3
|
||||
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
@ -707,9 +707,9 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
|
||||
; X64-NEXT: cqto
|
||||
; X64-NEXT: idivq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm3, %rax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm3, %rcx
|
||||
; X64-NEXT: cqto
|
||||
; X64-NEXT: idivq %rcx
|
||||
|
@ -565,9 +565,9 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divl %esi
|
||||
; X86-NEXT: movd %eax, %xmm2
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm3, %eax
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm3, %esi
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divl %esi
|
||||
@ -608,9 +608,9 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divl %ecx
|
||||
; X64-NEXT: movd %eax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %ecx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divl %ecx
|
||||
@ -657,11 +657,11 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; X86-NEXT: movd %xmm2, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm2, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; X86-NEXT: movd %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: movd %xmm1, (%esp)
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
@ -707,9 +707,9 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm3, %rax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm3, %rcx
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
|
@ -314,7 +314,7 @@ define void @extract_i64_1(i64* nocapture %dst, <2 x i64> %foo) nounwind {
|
||||
;
|
||||
; SSE2-X64-LABEL: extract_i64_1:
|
||||
; SSE2-X64: # %bb.0:
|
||||
; SSE2-X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-X64-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-X64-NEXT: retq
|
||||
;
|
||||
|
@ -351,7 +351,7 @@ define i64 @extractelement_v2i64_0(<2 x i64> %a, i256 %i) nounwind {
|
||||
define i64 @extractelement_v2i64_1(<2 x i64> %a, i256 %i) nounwind {
|
||||
; SSE2-LABEL: extractelement_v2i64_1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -371,7 +371,7 @@ define i64 @extractelement_v2i64_1(<2 x i64> %a, i256 %i) nounwind {
|
||||
define i64 @extractelement_v4i64_1(<4 x i64> %a, i256 %i) nounwind {
|
||||
; SSE2-LABEL: extractelement_v4i64_1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -392,7 +392,7 @@ define i64 @extractelement_v4i64_1(<4 x i64> %a, i256 %i) nounwind {
|
||||
define i64 @extractelement_v4i64_3(<4 x i64> %a, i256 %i) nounwind {
|
||||
; SSE2-LABEL: extractelement_v4i64_3:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -10,13 +10,13 @@ define i32 @t(<2 x i64>* %val) nounwind {
|
||||
; X32-SSE2-LABEL: t:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE2-NEXT: pshufd $78, (%eax), %xmm0 # xmm0 = mem[2,3,0,1]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
|
||||
; X32-SSE2-NEXT: movd %xmm0, %eax
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSSE3-LABEL: t:
|
||||
; X64-SSSE3: # %bb.0:
|
||||
; X64-SSSE3-NEXT: pshufd $78, (%rdi), %xmm0 # xmm0 = mem[2,3,0,1]
|
||||
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
|
||||
; X64-SSSE3-NEXT: movd %xmm0, %eax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
;
|
||||
@ -60,13 +60,13 @@ define void @t3(<2 x double>* %a0) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: t3:
|
||||
; X64-SSSE3: # %bb.0: # %bb
|
||||
; X64-SSSE3-NEXT: movsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
|
||||
; X64-SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSSE3-NEXT: movsd %xmm0, (%rax)
|
||||
; X64-SSSE3-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: t3:
|
||||
; X64-AVX: # %bb.0: # %bb
|
||||
; X64-AVX-NEXT: vmovsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: retq
|
||||
bb:
|
||||
@ -139,7 +139,7 @@ define float @t6(<8 x float> *%a0) {
|
||||
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -151,10 +151,10 @@ define float @t6(<8 x float> *%a0) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: t6:
|
||||
; X64-SSSE3: # %bb.0:
|
||||
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
|
||||
@ -162,10 +162,10 @@ define float @t6(<8 x float> *%a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: t6:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovss 4(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: retq
|
||||
%vecload = load <8 x float>, <8 x float>* %a0, align 32
|
||||
@ -184,7 +184,7 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
; X32-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpltss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss (%eax), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -193,10 +193,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: PR43971:
|
||||
; X64-SSSE3: # %bb.0: # %entry
|
||||
; X64-SSSE3-NEXT: movss 24(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; X64-SSSE3-NEXT: cmpltss %xmm0, %xmm1
|
||||
; X64-SSSE3-NEXT: movss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm1, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm0, %xmm1
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm1
|
||||
@ -205,10 +205,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
;
|
||||
; X64-AVX-LABEL: PR43971:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: vmovss 24(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vmovss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, (%rsi)
|
||||
; X64-AVX-NEXT: retq
|
||||
@ -231,7 +231,7 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -242,10 +242,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
;
|
||||
; X64-SSSE3-LABEL: PR43971_1:
|
||||
; X64-SSSE3: # %bb.0: # %entry
|
||||
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
|
||||
@ -253,10 +253,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
;
|
||||
; X64-AVX-LABEL: PR43971_1:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: vmovss 4(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -17,7 +17,7 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
||||
; LIN-SSE2-NEXT: movd %xmm0, %eax
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm1, %ecx
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm1, %edx
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm0, %esi
|
||||
@ -56,7 +56,7 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
||||
; WIN-SSE2-NEXT: movd %xmm0, %r8d
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm1, %r9d
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm1, %r10d
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm0, %edx
|
||||
@ -141,7 +141,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
|
||||
; LIN-SSE2-NEXT: movd %xmm0, %eax
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm1, %edx
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm1, %esi
|
||||
; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; LIN-SSE2-NEXT: movd %xmm0, %edi
|
||||
@ -184,7 +184,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
|
||||
; WIN-SSE2-NEXT: movd %xmm0, %eax
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm1, %ecx
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm1, %r8d
|
||||
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; WIN-SSE2-NEXT: movd %xmm0, %edx
|
||||
|
@ -127,7 +127,7 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSE3-NEXT: addl %eax, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
@ -136,7 +136,7 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: addl %eax, %esi
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edi
|
||||
@ -181,7 +181,7 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-LABEL: phadd_d_test2:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
@ -192,7 +192,7 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: addl %eax, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: addl %eax, %esi
|
||||
; SSE3-NEXT: movd %esi, %xmm0
|
||||
@ -243,7 +243,7 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSE3-NEXT: subl %ecx, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
@ -252,7 +252,7 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: subl %esi, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edi
|
||||
@ -297,7 +297,7 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-LABEL: phsub_d_test2:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
@ -306,7 +306,7 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: subl %edx, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
@ -513,7 +513,7 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm4, %r8d
|
||||
; SSE3-NEXT: addl %ecx, %r8d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm4, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r9d
|
||||
@ -522,7 +522,7 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: addl %edx, %esi
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edi
|
||||
@ -531,7 +531,7 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r10d
|
||||
; SSE3-NEXT: addl %eax, %r10d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %ecx
|
||||
@ -540,7 +540,7 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: addl %eax, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r11d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
@ -819,7 +819,7 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: movd %xmm2, %ecx
|
||||
; SSE-NEXT: subl %ecx, %eax
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: movd %xmm2, %ecx
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE-NEXT: movd %xmm0, %edx
|
||||
@ -830,7 +830,7 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE-NEXT: subl %esi, %edx
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE-NEXT: movd %xmm0, %esi
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: movd %xmm0, %edi
|
||||
; SSE-NEXT: subl %edi, %esi
|
||||
; SSE-NEXT: movd %esi, %xmm0
|
||||
@ -1133,7 +1133,7 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm4, %r8d
|
||||
; SSE3-NEXT: addl %ecx, %r8d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm4, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r9d
|
||||
@ -1142,7 +1142,7 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %esi
|
||||
; SSE3-NEXT: addl %edx, %esi
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edi
|
||||
@ -1151,7 +1151,7 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r10d
|
||||
; SSE3-NEXT: addl %eax, %r10d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %ecx
|
||||
@ -1160,7 +1160,7 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %edx
|
||||
; SSE3-NEXT: addl %eax, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %r11d
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
|
@ -554,7 +554,7 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm5
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm4
|
||||
@ -640,7 +640,7 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm5
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm4
|
||||
@ -677,7 +677,7 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm5
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm4
|
||||
|
@ -11,7 +11,7 @@
|
||||
define i32 @PR37890_v4i32(<4 x i32> %a) {
|
||||
; SSE2-LABEL: PR37890_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -20,7 +20,7 @@ define i32 @PR37890_v4i32(<4 x i32> %a) {
|
||||
;
|
||||
; SSSE3-SLOW-LABEL: PR37890_v4i32:
|
||||
; SSSE3-SLOW: # %bb.0:
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -36,7 +36,7 @@ define i32 @PR37890_v4i32(<4 x i32> %a) {
|
||||
;
|
||||
; AVX1-SLOW-LABEL: PR37890_v4i32:
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -52,7 +52,7 @@ define i32 @PR37890_v4i32(<4 x i32> %a) {
|
||||
;
|
||||
; AVX2-LABEL: PR37890_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -70,7 +70,7 @@ define i32 @PR37890_v4i32(<4 x i32> %a) {
|
||||
define i16 @PR37890_v8i16(<8 x i16> %a) {
|
||||
; SSE2-LABEL: PR37890_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddw %xmm1, %xmm0
|
||||
@ -83,7 +83,7 @@ define i16 @PR37890_v8i16(<8 x i16> %a) {
|
||||
;
|
||||
; SSSE3-SLOW-LABEL: PR37890_v8i16:
|
||||
; SSSE3-SLOW: # %bb.0:
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddw %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddw %xmm1, %xmm0
|
||||
@ -105,7 +105,7 @@ define i16 @PR37890_v8i16(<8 x i16> %a) {
|
||||
;
|
||||
; AVX1-SLOW-LABEL: PR37890_v8i16:
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -126,7 +126,7 @@ define i16 @PR37890_v8i16(<8 x i16> %a) {
|
||||
;
|
||||
; AVX2-LABEL: PR37890_v8i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -151,7 +151,7 @@ define i32 @PR37890_v8i32(<8 x i32> %a) {
|
||||
; SSE2-LABEL: PR37890_v8i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -161,7 +161,7 @@ define i32 @PR37890_v8i32(<8 x i32> %a) {
|
||||
; SSSE3-SLOW-LABEL: PR37890_v8i32:
|
||||
; SSSE3-SLOW: # %bb.0:
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -180,7 +180,7 @@ define i32 @PR37890_v8i32(<8 x i32> %a) {
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -202,7 +202,7 @@ define i32 @PR37890_v8i32(<8 x i32> %a) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -225,7 +225,7 @@ define i16 @PR37890_v16i16(<16 x i16> %a) {
|
||||
; SSE2-LABEL: PR37890_v16i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: paddw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddw %xmm1, %xmm0
|
||||
@ -239,7 +239,7 @@ define i16 @PR37890_v16i16(<16 x i16> %a) {
|
||||
; SSSE3-SLOW-LABEL: PR37890_v16i16:
|
||||
; SSSE3-SLOW: # %bb.0:
|
||||
; SSSE3-SLOW-NEXT: paddw %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddw %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddw %xmm1, %xmm0
|
||||
@ -264,7 +264,7 @@ define i16 @PR37890_v16i16(<16 x i16> %a) {
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -291,7 +291,7 @@ define i16 @PR37890_v16i16(<16 x i16> %a) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -322,7 +322,7 @@ define i32 @PR37890_v16i32(<16 x i32> %a) {
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -334,7 +334,7 @@ define i32 @PR37890_v16i32(<16 x i32> %a) {
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm1
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm1
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
@ -346,7 +346,7 @@ define i32 @PR37890_v16i32(<16 x i32> %a) {
|
||||
; SSSE3-FAST-NEXT: paddd %xmm3, %xmm1
|
||||
; SSSE3-FAST-NEXT: paddd %xmm2, %xmm1
|
||||
; SSSE3-FAST-NEXT: paddd %xmm0, %xmm1
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm0
|
||||
; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0
|
||||
; SSSE3-FAST-NEXT: movd %xmm0, %eax
|
||||
@ -359,7 +359,7 @@ define i32 @PR37890_v16i32(<16 x i32> %a) {
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -385,7 +385,7 @@ define i32 @PR37890_v16i32(<16 x i32> %a) {
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
|
@ -16,7 +16,7 @@
|
||||
define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; X86-SSE42-NEXT: movd %xmm2, %eax
|
||||
@ -49,7 +49,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, %eax
|
||||
@ -58,7 +58,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -80,7 +80,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; X64-SSE42-NEXT: movq %xmm2, %rax
|
||||
@ -88,7 +88,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -96,7 +96,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -104,7 +104,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX512-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -118,7 +118,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -135,7 +135,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
@ -144,7 +144,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v4i32:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -153,7 +153,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -170,7 +170,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
@ -179,7 +179,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v4i32:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -198,7 +198,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -229,7 +229,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -273,7 +273,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -328,7 +328,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -419,7 +419,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -444,7 +444,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -457,7 +457,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
@ -470,7 +470,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
@ -496,7 +496,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -519,7 +519,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -531,7 +531,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -543,7 +543,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -554,7 +554,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -577,7 +577,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -595,7 +595,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
@ -606,7 +606,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -618,7 +618,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -633,7 +633,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -651,7 +651,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
@ -662,7 +662,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -674,7 +674,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -686,7 +686,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -710,7 +710,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -758,7 +758,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -838,7 +838,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -916,7 +916,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -1072,7 +1072,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X86-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1104,7 +1104,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: movapd %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
@ -1122,7 +1122,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
@ -1137,7 +1137,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
@ -1193,7 +1193,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X64-SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1223,7 +1223,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: movapd %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
@ -1240,7 +1240,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -1254,7 +1254,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -1267,7 +1267,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -1303,7 +1303,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -1323,7 +1323,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
@ -1337,7 +1337,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -1350,7 +1350,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -1375,7 +1375,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pandn %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -1395,7 +1395,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
|
||||
@ -1409,7 +1409,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -1422,7 +1422,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -1436,7 +1436,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
@ -1465,7 +1465,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
@ -1521,7 +1521,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
@ -1622,7 +1622,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -1716,7 +1716,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pandn %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -1837,7 +1837,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1869,7 +1869,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1914,7 +1914,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1946,7 +1946,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1991,7 +1991,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2047,7 +2047,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2119,7 +2119,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2175,7 +2175,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
|
@ -16,7 +16,7 @@
|
||||
define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -50,7 +50,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, %eax
|
||||
@ -59,7 +59,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -81,7 +81,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -90,7 +90,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -98,7 +98,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -106,7 +106,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX512-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -120,7 +120,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -137,7 +137,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
@ -146,7 +146,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v4i32:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -155,7 +155,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -172,7 +172,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
@ -181,7 +181,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v4i32:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -200,7 +200,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -231,7 +231,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -275,7 +275,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -330,7 +330,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -421,7 +421,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -447,7 +447,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -460,7 +460,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
@ -473,7 +473,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
@ -499,7 +499,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -523,7 +523,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
@ -535,7 +535,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -547,7 +547,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -558,7 +558,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -581,7 +581,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -599,7 +599,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
@ -610,7 +610,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -622,7 +622,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -637,7 +637,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -655,7 +655,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
@ -666,7 +666,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -678,7 +678,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -690,7 +690,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -714,7 +714,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -762,7 +762,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -842,7 +842,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -920,7 +920,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
@ -1076,7 +1076,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm5, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
@ -1108,7 +1108,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: movapd %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
@ -1126,7 +1126,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
@ -1141,7 +1141,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
@ -1197,7 +1197,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X64-SSE2-NEXT: pandn %xmm5, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1227,7 +1227,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: movapd %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
@ -1244,7 +1244,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
@ -1258,7 +1258,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vmovq %xmm0, %rax
|
||||
@ -1271,7 +1271,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -1307,7 +1307,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
@ -1327,7 +1327,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE42-NEXT: pminsd %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pminsd %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
@ -1341,7 +1341,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpminsd %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -1354,7 +1354,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -1379,7 +1379,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
@ -1399,7 +1399,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE42-NEXT: pminsd %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pminsd %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
|
||||
@ -1413,7 +1413,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpminsd %xmm2, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -1426,7 +1426,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -1440,7 +1440,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
||||
@ -1469,7 +1469,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pminsw %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
@ -1525,7 +1525,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pminsw %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
@ -1626,7 +1626,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
@ -1720,7 +1720,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
@ -1841,7 +1841,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1873,7 +1873,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1918,7 +1918,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1950,7 +1950,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1995,7 +1995,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2051,7 +2051,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2123,7 +2123,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
@ -2179,7 +2179,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
|
||||
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
||||
|
@ -16,7 +16,7 @@
|
||||
define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
|
||||
; X86-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -52,7 +52,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
|
||||
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
|
||||
@ -65,7 +65,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -77,7 +77,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -99,7 +99,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
|
||||
; X64-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -110,7 +110,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -121,7 +121,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -132,7 +132,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX512-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -146,7 +146,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -169,7 +169,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
@ -178,7 +178,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v4i32:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -187,7 +187,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -210,7 +210,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
@ -219,7 +219,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v4i32:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -238,7 +238,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -275,7 +275,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -344,7 +344,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -385,7 +385,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -485,7 +485,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -514,7 +514,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -533,7 +533,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -551,7 +551,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -579,7 +579,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -606,7 +606,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -623,7 +623,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -640,7 +640,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -653,7 +653,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -680,7 +680,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -703,7 +703,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
@ -714,7 +714,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -726,7 +726,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -745,7 +745,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -768,7 +768,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
@ -779,7 +779,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -791,7 +791,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -803,7 +803,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -830,7 +830,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -885,7 +885,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -965,7 +965,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -1026,7 +1026,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -1169,7 +1169,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X86-SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1210,7 +1210,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: xorpd %xmm5, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm5, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm5
|
||||
@ -1238,7 +1238,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm4, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1260,7 +1260,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1318,7 +1318,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X64-SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1357,7 +1357,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: xorpd %xmm5, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm5, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm5
|
||||
@ -1383,7 +1383,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm4, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1404,7 +1404,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1419,7 +1419,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -1465,7 +1465,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X86-SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -1490,7 +1490,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxud %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxud %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
@ -1504,7 +1504,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -1517,7 +1517,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -1552,7 +1552,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm5
|
||||
; X64-SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm5, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -1577,7 +1577,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxud %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxud %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1
|
||||
@ -1591,7 +1591,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -1604,7 +1604,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -1618,7 +1618,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
|
||||
@ -1652,7 +1652,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1717,7 +1717,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
@ -1810,7 +1810,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pmaxub %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxub %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
@ -1879,7 +1879,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pmaxub %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxub %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
@ -1987,7 +1987,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -2025,7 +2025,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -2097,7 +2097,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -2135,7 +2135,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -2207,7 +2207,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -2249,7 +2249,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -2332,7 +2332,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
@ -2374,7 +2374,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
|
||||
|
@ -16,7 +16,7 @@
|
||||
define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm3
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm3
|
||||
@ -53,7 +53,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
|
||||
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
|
||||
@ -66,7 +66,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X86-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -78,7 +78,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -100,7 +100,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v2i64:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm3
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm3
|
||||
@ -112,7 +112,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX1-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -123,7 +123,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX2-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
@ -134,7 +134,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; X64-AVX512-LABEL: test_reduce_v2i64:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -148,7 +148,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
|
||||
define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -171,7 +171,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
@ -180,7 +180,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v4i32:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -189,7 +189,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
@ -212,7 +212,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v4i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
@ -221,7 +221,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v4i32:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -240,7 +240,7 @@ define i32 @test_reduce_v4i32(<4 x i32> %a0) {
|
||||
define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -271,7 +271,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -315,7 +315,7 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -350,7 +350,7 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -421,7 +421,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -451,7 +451,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -471,7 +471,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vxorps %xmm1, %xmm3, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
|
||||
@ -489,7 +489,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
@ -517,7 +517,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
@ -545,7 +545,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm2, %xmm3
|
||||
@ -563,7 +563,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
|
||||
@ -580,7 +580,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
@ -593,7 +593,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -620,7 +620,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -643,7 +643,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
@ -654,7 +654,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -666,7 +666,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -685,7 +685,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pandn %xmm1, %xmm4
|
||||
; X64-SSE2-NEXT: por %xmm0, %xmm4
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -708,7 +708,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v8i32:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
@ -719,7 +719,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -731,7 +731,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -743,7 +743,7 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -770,7 +770,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -816,7 +816,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -885,7 +885,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -937,7 +937,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -1069,7 +1069,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm5, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
@ -1111,7 +1111,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-SSE42-NEXT: xorpd %xmm4, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm4, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm4
|
||||
@ -1140,7 +1140,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
|
||||
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1162,7 +1162,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
@ -1220,7 +1220,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X64-SSE2-NEXT: pandn %xmm5, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
@ -1260,7 +1260,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-SSE42-NEXT: xorpd %xmm4, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm4, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm4
|
||||
@ -1287,7 +1287,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
@ -1308,7 +1308,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
@ -1323,7 +1323,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
||||
; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
@ -1369,7 +1369,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X86-SSE2-NEXT: pandn %xmm6, %xmm1
|
||||
; X86-SSE2-NEXT: por %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -1394,7 +1394,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-SSE42-NEXT: pminud %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pminud %xmm2, %xmm1
|
||||
; X86-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
@ -1408,7 +1408,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -1421,7 +1421,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -1456,7 +1456,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE2-NEXT: pand %xmm1, %xmm3
|
||||
; X64-SSE2-NEXT: pandn %xmm6, %xmm1
|
||||
; X64-SSE2-NEXT: por %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
@ -1481,7 +1481,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-SSE42-NEXT: pminud %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pminud %xmm2, %xmm1
|
||||
; X64-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE42-NEXT: pminud %xmm0, %xmm1
|
||||
@ -1495,7 +1495,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -1508,7 +1508,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -1522,7 +1522,7 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
|
||||
; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
||||
@ -1556,7 +1556,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1612,7 +1612,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm2
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
@ -1694,7 +1694,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-NEXT: pminub %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pminub %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
@ -1754,7 +1754,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE2-NEXT: pminub %xmm3, %xmm1
|
||||
; X64-SSE2-NEXT: pminub %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
@ -1851,7 +1851,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -1883,7 +1883,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -1928,7 +1928,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||
define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -1960,7 +1960,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
@ -2005,7 +2005,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||
define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -2041,7 +2041,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -2093,7 +2093,7 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
|
||||
define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X86-SSE2: ## %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X86-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
@ -2129,7 +2129,7 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
|
||||
; X64-SSE2: ## %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; X64-SSE2-NEXT: pminub %xmm1, %xmm0
|
||||
|
@ -77,7 +77,7 @@ define <1 x i128> @add_v1i128(<1 x i128> %x, <1 x i128> %y) nounwind {
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: movq %rsi, %xmm1
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rdx
|
||||
; X64-NEXT: addq $1, %rax
|
||||
; X64-NEXT: adcq $0, %rdx
|
||||
|
@ -16,7 +16,7 @@ define { i64, i64 } @foo(i64 %0, i64 %1) {
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; CHECK-NEXT: movq %xmm0, %rdx
|
||||
; CHECK-NEXT: retq
|
||||
%3 = zext i64 %1 to i128
|
||||
|
@ -116,14 +116,14 @@ define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounw
|
||||
; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X32-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
|
||||
|
@ -252,7 +252,7 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1)
|
||||
; X86-LABEL: signbits_sext_shuffle_sitofp:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
||||
@ -264,7 +264,7 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1)
|
||||
; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp:
|
||||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
||||
@ -478,7 +478,7 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6
|
||||
; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm4
|
||||
|
@ -25,7 +25,7 @@ define i32 @_Z10test_shortPsS_i_128(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: jne .LBB0_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -48,7 +48,7 @@ define i32 @_Z10test_shortPsS_i_128(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX-NEXT: cmpq %rcx, %rax
|
||||
; AVX-NEXT: jne .LBB0_1
|
||||
; AVX-NEXT: # %bb.2: # %middle.block
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -103,7 +103,7 @@ define i32 @_Z10test_shortPsS_i_256(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: jne .LBB1_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -128,7 +128,7 @@ define i32 @_Z10test_shortPsS_i_256(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: # %bb.2: # %middle.block
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -153,7 +153,7 @@ define i32 @_Z10test_shortPsS_i_256(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX256-NEXT: # %bb.2: # %middle.block
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -218,7 +218,7 @@ define i32 @_Z10test_shortPsS_i_512(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -251,7 +251,7 @@ define i32 @_Z10test_shortPsS_i_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -278,7 +278,7 @@ define i32 @_Z10test_shortPsS_i_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -305,7 +305,7 @@ define i32 @_Z10test_shortPsS_i_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -386,7 +386,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -433,7 +433,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -466,7 +466,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -498,7 +498,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -527,7 +527,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -593,7 +593,7 @@ define i32 @_Z9test_charPcS_i_128(i8* nocapture readonly, i8* nocapture readonly
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: jne .LBB4_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -616,7 +616,7 @@ define i32 @_Z9test_charPcS_i_128(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX-NEXT: cmpq %rcx, %rax
|
||||
; AVX-NEXT: jne .LBB4_1
|
||||
; AVX-NEXT: # %bb.2: # %middle.block
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -675,7 +675,7 @@ define i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly
|
||||
; SSE2-NEXT: jne .LBB5_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -701,7 +701,7 @@ define i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX1-NEXT: # %bb.2: # %middle.block
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -727,7 +727,7 @@ define i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX256-NEXT: # %bb.2: # %middle.block
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -798,7 +798,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -833,7 +833,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -861,7 +861,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -889,7 +889,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -982,7 +982,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -1033,7 +1033,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1068,7 +1068,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1102,7 +1102,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1132,7 +1132,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1195,7 +1195,7 @@ define i32 @test_unsigned_short_128(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: jne .LBB8_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1218,7 +1218,7 @@ define i32 @test_unsigned_short_128(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX-NEXT: cmpq %rcx, %rax
|
||||
; AVX-NEXT: jne .LBB8_1
|
||||
; AVX-NEXT: # %bb.2: # %middle.block
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1279,7 +1279,7 @@ define i32 @test_unsigned_short_256(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: jne .LBB9_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1310,7 +1310,7 @@ define i32 @test_unsigned_short_256(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: # %bb.2: # %middle.block
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1336,7 +1336,7 @@ define i32 @test_unsigned_short_256(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX256-NEXT: # %bb.2: # %middle.block
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1414,7 +1414,7 @@ define i32 @test_unsigned_short_512(i16* nocapture readonly, i16* nocapture read
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -1459,7 +1459,7 @@ define i32 @test_unsigned_short_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1491,7 +1491,7 @@ define i32 @test_unsigned_short_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1519,7 +1519,7 @@ define i32 @test_unsigned_short_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1627,7 +1627,7 @@ define i32 @test_unsigned_short_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; SSE2-NEXT: paddd %xmm5, %xmm9
|
||||
; SSE2-NEXT: paddd %xmm10, %xmm9
|
||||
; SSE2-NEXT: paddd %xmm8, %xmm9
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm9, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -1700,7 +1700,7 @@ define i32 @test_unsigned_short_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm9, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm8, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1744,7 +1744,7 @@ define i32 @test_unsigned_short_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1778,7 +1778,7 @@ define i32 @test_unsigned_short_1024(i16* nocapture readonly, i16* nocapture rea
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2074,7 +2074,7 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) {
|
||||
;
|
||||
; AVX1-LABEL: pmaddwd_negative2:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
@ -2647,7 +2647,7 @@ define i32 @madd_double_reduction(<8 x i16>* %arg, <8 x i16>* %arg1, <8 x i16>*
|
||||
; SSE2-NEXT: movdqu (%rcx), %xmm2
|
||||
; SSE2-NEXT: pmaddwd %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -2661,7 +2661,7 @@ define i32 @madd_double_reduction(<8 x i16>* %arg, <8 x i16>* %arg1, <8 x i16>*
|
||||
; AVX-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; AVX-NEXT: vpmaddwd (%rcx), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2708,7 +2708,7 @@ define i32 @madd_quad_reduction(<8 x i16>* %arg, <8 x i16>* %arg1, <8 x i16>* %a
|
||||
; SSE2-NEXT: pmaddwd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -2730,7 +2730,7 @@ define i32 @madd_quad_reduction(<8 x i16>* %arg, <8 x i16>* %arg1, <8 x i16>* %a
|
||||
; AVX-NEXT: vpmaddwd (%r10), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2798,13 +2798,13 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
|
||||
; SSE2-NEXT: jne .LBB33_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movd %xmm2, %ecx
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -2839,14 +2839,14 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
|
||||
; AVX1-NEXT: # %bb.2: # %middle.block
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2874,14 +2874,14 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
|
||||
; AVX256-NEXT: # %bb.2: # %middle.block
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX256-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2953,7 +2953,7 @@ define i32 @sum_of_square_differences(i8* %a, i8* %b, i32 %n) {
|
||||
; SSE2-NEXT: jne .LBB34_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -2980,7 +2980,7 @@ define i32 @sum_of_square_differences(i8* %a, i8* %b, i32 %n) {
|
||||
; AVX1-NEXT: # %bb.2: # %middle.block
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -3007,7 +3007,7 @@ define i32 @sum_of_square_differences(i8* %a, i8* %b, i32 %n) {
|
||||
; AVX256-NEXT: # %bb.2: # %middle.block
|
||||
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
|
@ -2478,7 +2478,7 @@ define void @compressstore_v2i64_v2i1(i64* %base, <2 x i64> %V, <2 x i1> %mask)
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB7_4
|
||||
; SSE2-NEXT: LBB7_3: ## %cond.store1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2574,7 +2574,7 @@ define void @compressstore_v4i64_v4i1(i64* %base, <4 x i64> %V, <4 x i1> %mask)
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB8_4
|
||||
; SSE2-NEXT: LBB8_3: ## %cond.store1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: addq $8, %rdi
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
@ -2585,7 +2585,7 @@ define void @compressstore_v4i64_v4i1(i64* %base, <4 x i64> %V, <4 x i1> %mask)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je LBB8_8
|
||||
; SSE2-NEXT: LBB8_7: ## %cond.store7
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2762,7 +2762,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB9_4
|
||||
; SSE2-NEXT: LBB9_3: ## %cond.store1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: addq $8, %rdi
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
@ -2773,7 +2773,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je LBB9_8
|
||||
; SSE2-NEXT: LBB9_7: ## %cond.store7
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: addq $8, %rdi
|
||||
; SSE2-NEXT: testb $16, %al
|
||||
@ -2784,7 +2784,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE2-NEXT: testb $32, %al
|
||||
; SSE2-NEXT: je LBB9_12
|
||||
; SSE2-NEXT: LBB9_11: ## %cond.store13
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: addq $8, %rdi
|
||||
; SSE2-NEXT: testb $64, %al
|
||||
@ -2795,7 +2795,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE2-NEXT: testb $-128, %al
|
||||
; SSE2-NEXT: je LBB9_16
|
||||
; SSE2-NEXT: LBB9_15: ## %cond.store19
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3068,7 +3068,7 @@ define void @compressstore_v4i32_v4i32(i32* %base, <4 x i32> %V, <4 x i32> %trig
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je LBB10_6
|
||||
; SSE2-NEXT: LBB10_5: ## %cond.store4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE2-NEXT: addq $4, %rdi
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
|
@ -34,23 +34,23 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .LBB0_1: # %cond.load
|
||||
; SSE-NEXT: movq %xmm0, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5,6,7]
|
||||
; SSE-NEXT: testb $2, %al
|
||||
; SSE-NEXT: je .LBB0_4
|
||||
; SSE-NEXT: .LBB0_3: # %cond.load1
|
||||
; SSE-NEXT: pextrq $1, %xmm0, %rcx
|
||||
; SSE-NEXT: insertps $16, (%rcx), %xmm3 # xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; SSE-NEXT: testb $4, %al
|
||||
; SSE-NEXT: je .LBB0_6
|
||||
; SSE-NEXT: .LBB0_5: # %cond.load4
|
||||
; SSE-NEXT: movq %xmm1, %rcx
|
||||
; SSE-NEXT: insertps $32, (%rcx), %xmm3 # xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; SSE-NEXT: testb $8, %al
|
||||
; SSE-NEXT: je .LBB0_8
|
||||
; SSE-NEXT: .LBB0_7: # %cond.load7
|
||||
; SSE-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE-NEXT: insertps $48, (%rax), %xmm3 # xmm3 = xmm3[0,1,2],mem[0]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
|
||||
; SSE-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -63,14 +63,14 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
|
||||
; AVX1-NEXT: je .LBB0_2
|
||||
; AVX1-NEXT: # %bb.1: # %cond.load
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX1-NEXT: .LBB0_2: # %else
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB0_4
|
||||
; AVX1-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: .LBB0_4: # %else2
|
||||
; AVX1-NEXT: testb $4, %al
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
@ -84,12 +84,12 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: .LBB0_5: # %cond.load4
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: testb $8, %al
|
||||
; AVX1-NEXT: je .LBB0_8
|
||||
; AVX1-NEXT: .LBB0_7: # %cond.load7
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX1-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -103,14 +103,14 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
|
||||
; AVX2-NEXT: je .LBB0_2
|
||||
; AVX2-NEXT: # %bb.1: # %cond.load
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX2-NEXT: .LBB0_2: # %else
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB0_4
|
||||
; AVX2-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: .LBB0_4: # %else2
|
||||
; AVX2-NEXT: testb $4, %al
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
@ -124,12 +124,12 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-NEXT: .LBB0_5: # %cond.load4
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: testb $8, %al
|
||||
; AVX2-NEXT: je .LBB0_8
|
||||
; AVX2-NEXT: .LBB0_7: # %cond.load7
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX2-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -159,7 +159,7 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; SSE-NEXT: pmovsxdq %xmm0, %xmm4
|
||||
; SSE-NEXT: psllq $2, %xmm4
|
||||
; SSE-NEXT: paddq %xmm3, %xmm4
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; SSE-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm5
|
||||
@ -168,7 +168,7 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; SSE-NEXT: je .LBB1_2
|
||||
; SSE-NEXT: # %bb.1: # %cond.load
|
||||
; SSE-NEXT: movq %xmm4, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE-NEXT: .LBB1_2: # %else
|
||||
; SSE-NEXT: psllq $2, %xmm0
|
||||
@ -176,7 +176,7 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; SSE-NEXT: je .LBB1_4
|
||||
; SSE-NEXT: # %bb.3: # %cond.load1
|
||||
; SSE-NEXT: pextrq $1, %xmm4, %rcx
|
||||
; SSE-NEXT: insertps $16, (%rcx), %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; SSE-NEXT: .LBB1_4: # %else2
|
||||
; SSE-NEXT: paddq %xmm0, %xmm3
|
||||
; SSE-NEXT: testb $4, %al
|
||||
@ -189,12 +189,12 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .LBB1_5: # %cond.load4
|
||||
; SSE-NEXT: movq %xmm3, %rcx
|
||||
; SSE-NEXT: insertps $32, (%rcx), %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; SSE-NEXT: testb $8, %al
|
||||
; SSE-NEXT: je .LBB1_8
|
||||
; SSE-NEXT: .LBB1_7: # %cond.load7
|
||||
; SSE-NEXT: pextrq $1, %xmm3, %rax
|
||||
; SSE-NEXT: insertps $48, (%rax), %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -202,7 +202,7 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsllq $2, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm3, %xmm4
|
||||
@ -217,14 +217,14 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; AVX1-NEXT: je .LBB1_2
|
||||
; AVX1-NEXT: # %bb.1: # %cond.load
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX1-NEXT: .LBB1_2: # %else
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB1_4
|
||||
; AVX1-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: .LBB1_4: # %else2
|
||||
; AVX1-NEXT: testb $4, %al
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
@ -238,12 +238,12 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: .LBB1_5: # %cond.load4
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: testb $8, %al
|
||||
; AVX1-NEXT: je .LBB1_8
|
||||
; AVX1-NEXT: .LBB1_7: # %cond.load7
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX1-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -262,14 +262,14 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; AVX2-NEXT: je .LBB1_2
|
||||
; AVX2-NEXT: # %bb.1: # %cond.load
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX2-NEXT: .LBB1_2: # %else
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB1_4
|
||||
; AVX2-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: .LBB1_4: # %else2
|
||||
; AVX2-NEXT: testb $4, %al
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
@ -283,12 +283,12 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-NEXT: .LBB1_5: # %cond.load4
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: testb $8, %al
|
||||
; AVX2-NEXT: je .LBB1_8
|
||||
; AVX2-NEXT: .LBB1_7: # %cond.load7
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX2-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -328,7 +328,7 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; SSE-NEXT: je .LBB2_2
|
||||
; SSE-NEXT: # %bb.1: # %cond.load
|
||||
; SSE-NEXT: movq %xmm0, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5,6,7]
|
||||
; SSE-NEXT: .LBB2_2: # %else
|
||||
; SSE-NEXT: psllq $2, %xmm1
|
||||
@ -336,7 +336,7 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; SSE-NEXT: je .LBB2_4
|
||||
; SSE-NEXT: # %bb.3: # %cond.load1
|
||||
; SSE-NEXT: pextrq $1, %xmm0, %rcx
|
||||
; SSE-NEXT: insertps $16, (%rcx), %xmm3 # xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; SSE-NEXT: .LBB2_4: # %else2
|
||||
; SSE-NEXT: paddq %xmm1, %xmm4
|
||||
; SSE-NEXT: testb $4, %al
|
||||
@ -349,12 +349,12 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .LBB2_5: # %cond.load4
|
||||
; SSE-NEXT: movq %xmm4, %rcx
|
||||
; SSE-NEXT: insertps $32, (%rcx), %xmm3 # xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; SSE-NEXT: testb $8, %al
|
||||
; SSE-NEXT: je .LBB2_8
|
||||
; SSE-NEXT: .LBB2_7: # %cond.load7
|
||||
; SSE-NEXT: pextrq $1, %xmm4, %rax
|
||||
; SSE-NEXT: insertps $48, (%rax), %xmm3 # xmm3 = xmm3[0,1,2],mem[0]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
|
||||
; SSE-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -375,14 +375,14 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; AVX1-NEXT: je .LBB2_2
|
||||
; AVX1-NEXT: # %bb.1: # %cond.load
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX1-NEXT: .LBB2_2: # %else
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB2_4
|
||||
; AVX1-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX1-NEXT: .LBB2_4: # %else2
|
||||
; AVX1-NEXT: testb $4, %al
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
@ -396,12 +396,12 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: .LBB2_5: # %cond.load4
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX1-NEXT: testb $8, %al
|
||||
; AVX1-NEXT: je .LBB2_8
|
||||
; AVX1-NEXT: .LBB2_7: # %cond.load7
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX1-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -419,14 +419,14 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; AVX2-NEXT: je .LBB2_2
|
||||
; AVX2-NEXT: # %bb.1: # %cond.load
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vmovss (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX2-NEXT: .LBB2_2: # %else
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB2_4
|
||||
; AVX2-NEXT: # %bb.3: # %cond.load1
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $16, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; AVX2-NEXT: .LBB2_4: # %else2
|
||||
; AVX2-NEXT: testb $4, %al
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
@ -440,12 +440,12 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-NEXT: .LBB2_5: # %cond.load4
|
||||
; AVX2-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX2-NEXT: vinsertps $32, (%rcx), %xmm2, %xmm2 # xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; AVX2-NEXT: testb $8, %al
|
||||
; AVX2-NEXT: je .LBB2_8
|
||||
; AVX2-NEXT: .LBB2_7: # %cond.load7
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX2-NEXT: vinsertps $48, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; AVX2-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -480,7 +480,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq %rdi, %xmm6
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,1,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; SSE-NEXT: paddq %xmm8, %xmm0
|
||||
; SSE-NEXT: pxor %xmm6, %xmm6
|
||||
@ -513,7 +513,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; SSE-NEXT: pextrq $1, %xmm4, %rcx
|
||||
; SSE-NEXT: pinsrb $3, (%rcx), %xmm5
|
||||
; SSE-NEXT: .LBB3_8: # %else8
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm8, %xmm0
|
||||
; SSE-NEXT: testb $16, %al
|
||||
; SSE-NEXT: je .LBB3_10
|
||||
@ -542,7 +542,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; SSE-NEXT: pextrq $1, %xmm1, %rcx
|
||||
; SSE-NEXT: pinsrb $7, (%rcx), %xmm5
|
||||
; SSE-NEXT: .LBB3_16: # %else20
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm8, %xmm0
|
||||
; SSE-NEXT: testl $256, %eax # imm = 0x100
|
||||
; SSE-NEXT: je .LBB3_18
|
||||
@ -571,7 +571,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; SSE-NEXT: pextrq $1, %xmm1, %rcx
|
||||
; SSE-NEXT: pinsrb $11, (%rcx), %xmm5
|
||||
; SSE-NEXT: .LBB3_24: # %else32
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm8, %xmm0
|
||||
; SSE-NEXT: testl $4096, %eax # imm = 0x1000
|
||||
; SSE-NEXT: je .LBB3_26
|
||||
@ -611,7 +611,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpaddq %xmm6, %xmm4, %xmm6
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
@ -626,7 +626,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vpinsrb $0, (%rcx), %xmm3, %xmm3
|
||||
; AVX1-NEXT: .LBB3_2: # %else
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm5[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm5, %xmm6
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB3_4
|
||||
@ -657,7 +657,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vpinsrb $4, (%rcx), %xmm3, %xmm3
|
||||
; AVX1-NEXT: .LBB3_10: # %else11
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm6
|
||||
; AVX1-NEXT: testb $32, %al
|
||||
; AVX1-NEXT: je .LBB3_12
|
||||
@ -689,7 +689,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: vpinsrb $8, (%rcx), %xmm3, %xmm3
|
||||
; AVX1-NEXT: .LBB3_18: # %else23
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
|
||||
; AVX1-NEXT: testl $512, %eax # imm = 0x200
|
||||
; AVX1-NEXT: je .LBB3_20
|
||||
@ -1040,7 +1040,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; SSE-NEXT: je .LBB4_1
|
||||
; SSE-NEXT: # %bb.2: # %cond.load
|
||||
; SSE-NEXT: movq %xmm5, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: testb $2, %al
|
||||
; SSE-NEXT: jne .LBB4_4
|
||||
; SSE-NEXT: jmp .LBB4_5
|
||||
@ -1105,7 +1105,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; SSE-NEXT: je .LBB4_19
|
||||
; SSE-NEXT: # %bb.20: # %cond.load23
|
||||
; SSE-NEXT: movq %xmm4, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm5 # xmm5 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: testb $2, %al
|
||||
; SSE-NEXT: jne .LBB4_22
|
||||
; SSE-NEXT: jmp .LBB4_23
|
||||
@ -1174,7 +1174,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; SSE-NEXT: je .LBB4_37
|
||||
; SSE-NEXT: # %bb.38: # %cond.load72
|
||||
; SSE-NEXT: movq %xmm4, %rcx
|
||||
; SSE-NEXT: movd (%rcx), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: testb $2, %al
|
||||
; SSE-NEXT: jne .LBB4_40
|
||||
; SSE-NEXT: jmp .LBB4_41
|
||||
@ -1260,7 +1260,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX1-NEXT: je .LBB4_2
|
||||
; AVX1-NEXT: # %bb.1: # %cond.load
|
||||
; AVX1-NEXT: vmovq %xmm3, %rdx
|
||||
; AVX1-NEXT: vmovd (%rdx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: .LBB4_2: # %else
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB4_4
|
||||
@ -1334,7 +1334,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX1-NEXT: je .LBB4_18
|
||||
; AVX1-NEXT: # %bb.17: # %cond.load23
|
||||
; AVX1-NEXT: vmovq %xmm7, %rcx
|
||||
; AVX1-NEXT: vmovd (%rcx), %xmm4 # xmm4 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: .LBB4_18: # %else27
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB4_20
|
||||
@ -1405,7 +1405,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX1-NEXT: je .LBB4_34
|
||||
; AVX1-NEXT: # %bb.33: # %cond.load72
|
||||
; AVX1-NEXT: vmovq %xmm7, %rcx
|
||||
; AVX1-NEXT: vmovd (%rcx), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: .LBB4_34: # %else76
|
||||
; AVX1-NEXT: testb $2, %al
|
||||
; AVX1-NEXT: je .LBB4_36
|
||||
@ -1491,7 +1491,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX2-NEXT: je .LBB4_2
|
||||
; AVX2-NEXT: # %bb.1: # %cond.load
|
||||
; AVX2-NEXT: vmovq %xmm3, %rcx
|
||||
; AVX2-NEXT: vmovd (%rcx), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: .LBB4_2: # %else
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB4_4
|
||||
@ -1534,7 +1534,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX2-NEXT: je .LBB4_18
|
||||
; AVX2-NEXT: # %bb.17: # %cond.load23
|
||||
; AVX2-NEXT: vmovq %xmm3, %rcx
|
||||
; AVX2-NEXT: vmovd (%rcx), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: .LBB4_18: # %else27
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB4_20
|
||||
@ -1678,7 +1678,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; AVX2-NEXT: jmp .LBB4_32
|
||||
; AVX2-NEXT: .LBB4_33: # %cond.load72
|
||||
; AVX2-NEXT: vmovq %xmm3, %rcx
|
||||
; AVX2-NEXT: vmovd (%rcx), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: testb $2, %al
|
||||
; AVX2-NEXT: je .LBB4_36
|
||||
; AVX2-NEXT: .LBB4_35: # %cond.load78
|
||||
|
@ -153,7 +153,7 @@ define <4 x double> @load_v4f64_v4i32(<4 x i32> %trigger, <4 x double>* %addr, <
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2
|
||||
@ -233,7 +233,7 @@ define <4 x double> @load_v4f64_v4i32_zero(<4 x i32> %trigger, <4 x double>* %ad
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0
|
||||
@ -458,12 +458,12 @@ define <8 x double> @load_v8f64_v8i16(<8 x i16> %trigger, <8 x double>* %addr, <
|
||||
;
|
||||
; AVX1-LABEL: load_v8f64_v8i16:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmovsxwd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
|
||||
@ -479,7 +479,7 @@ define <8 x double> @load_v8f64_v8i16(<8 x i16> %trigger, <8 x double>* %addr, <
|
||||
;
|
||||
; AVX2-LABEL: load_v8f64_v8i16:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vpmovsxwd %xmm3, %xmm3
|
||||
@ -1778,12 +1778,12 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6
|
||||
;
|
||||
; AVX1-LABEL: load_v8i64_v8i16:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmovsxwd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
|
||||
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
|
||||
@ -1799,7 +1799,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6
|
||||
;
|
||||
; AVX2-LABEL: load_v8i64_v8i16:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vpmovsxwd %xmm3, %xmm3
|
||||
|
@ -838,7 +838,7 @@ define void @store_v2i64_v2i64(<2 x i64> %trigger, <2 x i64>* %addr, <2 x i64> %
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB7_4
|
||||
; SSE2-NEXT: LBB7_3: ## %cond.store1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -922,7 +922,7 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, <4 x i64>* %addr, <4 x i64> %
|
||||
; SSE2-NEXT: testb $2, %al
|
||||
; SSE2-NEXT: je LBB8_4
|
||||
; SSE2-NEXT: LBB8_3: ## %cond.store1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je LBB8_6
|
||||
@ -931,7 +931,7 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, <4 x i64>* %addr, <4 x i64> %
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je LBB8_8
|
||||
; SSE2-NEXT: LBB8_7: ## %cond.store5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, 24(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1158,7 +1158,7 @@ define void @store_v4i32_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je LBB11_6
|
||||
; SSE2-NEXT: LBB11_5: ## %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je LBB11_8
|
||||
@ -1280,7 +1280,7 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je LBB12_6
|
||||
; SSE2-NEXT: LBB12_5: ## %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je LBB12_8
|
||||
@ -1299,7 +1299,7 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %
|
||||
; SSE2-NEXT: testb $64, %al
|
||||
; SSE2-NEXT: je LBB12_14
|
||||
; SSE2-NEXT: LBB12_13: ## %cond.store11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 24(%rdi)
|
||||
; SSE2-NEXT: testb $-128, %al
|
||||
; SSE2-NEXT: je LBB12_16
|
||||
@ -4674,7 +4674,7 @@ define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, <4 x doub
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: vmaskmovpd %ymm0, %ymm1, (%rdi)
|
||||
@ -4853,7 +4853,7 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
|
||||
; SSE2-NEXT: testb $4, %cl
|
||||
; SSE2-NEXT: je LBB25_6
|
||||
; SSE2-NEXT: LBB25_5: ## %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -60,7 +60,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB0_6
|
||||
; SSE2-NEXT: .LBB0_5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: jne .LBB0_7
|
||||
@ -75,7 +75,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $64, %al
|
||||
; SSE2-NEXT: je .LBB0_14
|
||||
; SSE2-NEXT: .LBB0_13: # %cond.store11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 24(%rdi)
|
||||
; SSE2-NEXT: testb $-128, %al
|
||||
; SSE2-NEXT: je .LBB0_16
|
||||
@ -1030,7 +1030,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB3_6
|
||||
; SSE2-NEXT: .LBB3_5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je .LBB3_8
|
||||
|
@ -141,7 +141,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB0_6
|
||||
; SSE2-NEXT: # %bb.5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm4, 8(%rdi)
|
||||
; SSE2-NEXT: .LBB0_6: # %else4
|
||||
; SSE2-NEXT: por %xmm3, %xmm2
|
||||
@ -176,7 +176,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $64, %al
|
||||
; SSE2-NEXT: je .LBB0_14
|
||||
; SSE2-NEXT: .LBB0_13: # %cond.store11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm13[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm13[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 24(%rdi)
|
||||
; SSE2-NEXT: testb $-128, %al
|
||||
; SSE2-NEXT: je .LBB0_16
|
||||
@ -1579,7 +1579,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB3_6
|
||||
; SSE2-NEXT: .LBB3_5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je .LBB3_8
|
||||
|
@ -87,7 +87,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB0_6
|
||||
; SSE2-NEXT: # %bb.5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm13[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm13[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: .LBB0_6: # %else4
|
||||
; SSE2-NEXT: por %xmm0, %xmm3
|
||||
@ -122,7 +122,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: testb $64, %al
|
||||
; SSE2-NEXT: je .LBB0_14
|
||||
; SSE2-NEXT: .LBB0_13: # %cond.store11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 24(%rdi)
|
||||
; SSE2-NEXT: testb $-128, %al
|
||||
; SSE2-NEXT: je .LBB0_16
|
||||
@ -1351,7 +1351,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask
|
||||
; SSE2-NEXT: testb $4, %al
|
||||
; SSE2-NEXT: je .LBB3_6
|
||||
; SSE2-NEXT: .LBB3_5: # %cond.store3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: testb $8, %al
|
||||
; SSE2-NEXT: je .LBB3_8
|
||||
|
@ -309,7 +309,7 @@ define void @merge_2_v4f32_align1_ntstore(<4 x float>* %a0, <4 x float>* %a1) no
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm2, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 12(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm2, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 8(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
@ -320,7 +320,7 @@ define void @merge_2_v4f32_align1_ntstore(<4 x float>* %a0, <4 x float>* %a1) no
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 28(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 24(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
@ -348,12 +348,12 @@ define void @merge_2_v4f32_align1_ntstore(<4 x float>* %a0, <4 x float>* %a1) no
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, (%rsi)
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 8(%rsi)
|
||||
; X64-SSE2-NEXT: movq %xmm1, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 16(%rsi)
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 24(%rsi)
|
||||
; X64-SSE2-NEXT: retq
|
||||
@ -422,7 +422,7 @@ define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1) nounwind {
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm2, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 12(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm2, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 8(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
@ -433,7 +433,7 @@ define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1) nounwind {
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 28(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movd %xmm0, %ecx
|
||||
; X86-SSE2-NEXT: movntil %ecx, 24(%eax)
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
@ -461,12 +461,12 @@ define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1) nounwind {
|
||||
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, (%rsi)
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 8(%rsi)
|
||||
; X64-SSE2-NEXT: movq %xmm1, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 16(%rsi)
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE2-NEXT: movntiq %rax, 24(%rsi)
|
||||
; X64-SSE2-NEXT: retq
|
||||
|
@ -197,7 +197,7 @@ define i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -263,7 +263,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -327,7 +327,7 @@ define i32 @sad_16i8_256() "min-legal-vector-width"="256" {
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -388,7 +388,7 @@ define i32 @sad_16i8_512() "min-legal-vector-width"="512" {
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -955,10 +955,10 @@ define void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal
|
||||
; CHECK-LABEL: zext_v16i8_v16i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
@ -977,10 +977,10 @@ define void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal
|
||||
; CHECK-LABEL: sext_v16i8_v16i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3
|
||||
; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpmovsxwq %xmm2, %ymm2
|
||||
|
@ -595,14 +595,14 @@ define void @test_extract_f64(<2 x double> %arg, double* %dst) {
|
||||
define void @test_extract_i64(<2 x i64> %arg, i64* %dst) {
|
||||
; SSE2-LABEL: test_extract_i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: movntiq %rax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4A-LABEL: test_extract_i64:
|
||||
; SSE4A: # %bb.0:
|
||||
; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE4A-NEXT: movq %xmm0, %rax
|
||||
; SSE4A-NEXT: movntiq %rax, (%rdi)
|
||||
; SSE4A-NEXT: retq
|
||||
|
@ -9,7 +9,7 @@
|
||||
define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) nounwind {
|
||||
; SSE2-LABEL: v3i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movq %xmm2, 16(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm0, (%rdi)
|
||||
@ -285,7 +285,7 @@ define void @v7i16(<4 x i16> %a, <4 x i16> %b, <7 x i16>* %p) nounwind {
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,6,4,7]
|
||||
; SSE2-NEXT: movw %ax, 12(%rdi)
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -391,7 +391,7 @@ define void @v12i8(<8 x i8> %a, <8 x i8> %b, <12 x i8>* %p) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: movq %xmm2, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -539,7 +539,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind {
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,2,2]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm2[2,3],xmm4[4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm0[4,5],xmm4[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
|
||||
@ -637,7 +637,7 @@ define void @pr29025(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) nounw
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE2-NEXT: por %xmm0, %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1202,7 +1202,7 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
|
||||
; SSE2-NEXT: movups 32(%rdi), %xmm10
|
||||
; SSE2-NEXT: movups 48(%rdi), %xmm12
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm6[0,0]
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm6[3,3]
|
||||
@ -1215,7 +1215,7 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm4[3,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,2,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm8[1,0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm12[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm12[2,3,2,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm12 = xmm12[0,3],xmm4[0,2]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1],xmm8[0,3]
|
||||
@ -1243,12 +1243,12 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
|
||||
; SSE42-NEXT: movdqu 48(%rdi), %xmm5
|
||||
; SSE42-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0,1],xmm4[2,3],xmm6[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
|
||||
; SSE42-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm2[2,3]
|
||||
; SSE42-NEXT: insertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm10[1]
|
||||
; SSE42-NEXT: movdqa %xmm9, %xmm1
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm5[2,3],xmm1[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
|
||||
; SSE42-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm9[2,3]
|
||||
; SSE42-NEXT: insertps {{.*#+}} xmm5 = xmm5[0,1,2],xmm8[1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm10[0,1,2,2]
|
||||
@ -1476,7 +1476,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm1[2,3],xmm7[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm7[0,1,2,3],xmm1[4,5],xmm7[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3],xmm0[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,2,3,3]
|
||||
@ -1489,7 +1489,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2,3],xmm4[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm6[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5],xmm4[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm8[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm8[2,3,2,3]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0,1],xmm4[2,3],xmm6[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,2,3,3]
|
||||
|
@ -46,7 +46,7 @@ define i32 @extract_extract01_v4i32_add_i32(<4 x i32> %x) {
|
||||
define i32 @extract_extract23_v4i32_add_i32(<4 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v4i32_add_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -114,7 +114,7 @@ define i32 @extract_extract01_v4i32_add_i32_commute(<4 x i32> %x) {
|
||||
define i32 @extract_extract23_v4i32_add_i32_commute(<4 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v4i32_add_i32_commute:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -326,7 +326,7 @@ define i32 @extract_extract01_v4i32_sub_i32(<4 x i32> %x) {
|
||||
define i32 @extract_extract23_v4i32_sub_i32(<4 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v4i32_sub_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %eax
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %ecx
|
||||
@ -382,7 +382,7 @@ define i32 @extract_extract01_v4i32_sub_i32_commute(<4 x i32> %x) {
|
||||
define i32 @extract_extract23_v4i32_sub_i32_commute(<4 x i32> %x) {
|
||||
; SSE3-LABEL: extract_extract23_v4i32_sub_i32_commute:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm1, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
@ -557,7 +557,7 @@ define i32 @extract_extract01_v8i32_add_i32(<8 x i32> %x) {
|
||||
define i32 @extract_extract23_v8i32_add_i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v8i32_add_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -594,7 +594,7 @@ define i32 @extract_extract23_v8i32_add_i32(<8 x i32> %x) {
|
||||
define i32 @extract_extract67_v8i32_add_i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract67_v8i32_add_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -684,7 +684,7 @@ define i32 @extract_extract01_v8i32_add_i32_commute(<8 x i32> %x) {
|
||||
define i32 @extract_extract23_v8i32_add_i32_commute(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v8i32_add_i32_commute:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -721,7 +721,7 @@ define i32 @extract_extract23_v8i32_add_i32_commute(<8 x i32> %x) {
|
||||
define i32 @extract_extract67_v8i32_add_i32_commute(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract67_v8i32_add_i32_commute:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %ecx
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
@ -1119,7 +1119,7 @@ define i32 @extract_extract01_v8i32_sub_i32(<8 x i32> %x) {
|
||||
define i32 @extract_extract23_v8i32_sub_i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract23_v8i32_sub_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm1, %eax
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %ecx
|
||||
@ -1156,7 +1156,7 @@ define i32 @extract_extract23_v8i32_sub_i32(<8 x i32> %x) {
|
||||
define i32 @extract_extract67_v8i32_sub_i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: extract_extract67_v8i32_sub_i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %eax
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
||||
; SSE3-SLOW-NEXT: movd %xmm0, %ecx
|
||||
@ -1672,7 +1672,7 @@ define i32 @extract_extract01_v4i32_add_i32_uses3(<4 x i32> %x, i32* %p1, i32* %
|
||||
define i32 @partial_reduction_add_v8i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: partial_reduction_add_v8i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1681,7 +1681,7 @@ define i32 @partial_reduction_add_v8i32(<8 x i32> %x) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: partial_reduction_add_v8i32:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-FAST-NEXT: movd %xmm1, %eax
|
||||
@ -1689,7 +1689,7 @@ define i32 @partial_reduction_add_v8i32(<8 x i32> %x) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: partial_reduction_add_v8i32:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1715,7 +1715,7 @@ define i32 @partial_reduction_add_v8i32(<8 x i32> %x) {
|
||||
define i32 @partial_reduction_add_v16i32(<16 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: partial_reduction_add_v16i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1724,7 +1724,7 @@ define i32 @partial_reduction_add_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: partial_reduction_add_v16i32:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-FAST-NEXT: movd %xmm1, %eax
|
||||
@ -1732,7 +1732,7 @@ define i32 @partial_reduction_add_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: partial_reduction_add_v16i32:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1758,7 +1758,7 @@ define i32 @partial_reduction_add_v16i32(<16 x i32> %x) {
|
||||
define i32 @partial_reduction_sub_v8i32(<8 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: partial_reduction_sub_v8i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: psubd %xmm1, %xmm0
|
||||
@ -1767,7 +1767,7 @@ define i32 @partial_reduction_sub_v8i32(<8 x i32> %x) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: partial_reduction_sub_v8i32:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
|
||||
; SSE3-FAST-NEXT: movd %xmm0, %eax
|
||||
@ -1775,7 +1775,7 @@ define i32 @partial_reduction_sub_v8i32(<8 x i32> %x) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: partial_reduction_sub_v8i32:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
@ -1785,7 +1785,7 @@ define i32 @partial_reduction_sub_v8i32(<8 x i32> %x) {
|
||||
;
|
||||
; AVX-FAST-LABEL: partial_reduction_sub_v8i32:
|
||||
; AVX-FAST: # %bb.0:
|
||||
; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
|
||||
; AVX-FAST-NEXT: vmovd %xmm0, %eax
|
||||
@ -1802,7 +1802,7 @@ define i32 @partial_reduction_sub_v8i32(<8 x i32> %x) {
|
||||
define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
; SSE3-SLOW-LABEL: partial_reduction_sub_v16i32:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: psubd %xmm1, %xmm0
|
||||
@ -1811,7 +1811,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: partial_reduction_sub_v16i32:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
|
||||
; SSE3-FAST-NEXT: movd %xmm0, %eax
|
||||
@ -1819,7 +1819,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: partial_reduction_sub_v16i32:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
@ -1829,7 +1829,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; AVX1-FAST-LABEL: partial_reduction_sub_v16i32:
|
||||
; AVX1-FAST: # %bb.0:
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vmovd %xmm0, %eax
|
||||
@ -1838,7 +1838,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; AVX2-FAST-LABEL: partial_reduction_sub_v16i32:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
@ -1848,7 +1848,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
;
|
||||
; AVX512-FAST-LABEL: partial_reduction_sub_v16i32:
|
||||
; AVX512-FAST: # %bb.0:
|
||||
; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
@ -1868,7 +1868,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
|
||||
define i16 @hadd16_8(<8 x i16> %x223) {
|
||||
; SSE3-SLOW-LABEL: hadd16_8:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddw %xmm1, %xmm0
|
||||
@ -1890,7 +1890,7 @@ define i16 @hadd16_8(<8 x i16> %x223) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: hadd16_8:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -1921,7 +1921,7 @@ define i16 @hadd16_8(<8 x i16> %x223) {
|
||||
define i32 @hadd32_4(<4 x i32> %x225) {
|
||||
; SSE3-SLOW-LABEL: hadd32_4:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1937,7 +1937,7 @@ define i32 @hadd32_4(<4 x i32> %x225) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: hadd32_4:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1961,7 +1961,7 @@ define i32 @hadd32_4(<4 x i32> %x225) {
|
||||
define i32 @hadd32_8(<8 x i32> %x225) {
|
||||
; SSE3-SLOW-LABEL: hadd32_8:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -1970,7 +1970,7 @@ define i32 @hadd32_8(<8 x i32> %x225) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: hadd32_8:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-FAST-NEXT: movd %xmm1, %eax
|
||||
@ -1978,7 +1978,7 @@ define i32 @hadd32_8(<8 x i32> %x225) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: hadd32_8:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2004,7 +2004,7 @@ define i32 @hadd32_8(<8 x i32> %x225) {
|
||||
define i32 @hadd32_16(<16 x i32> %x225) {
|
||||
; SSE3-SLOW-LABEL: hadd32_16:
|
||||
; SSE3-SLOW: # %bb.0:
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE3-SLOW-NEXT: paddd %xmm1, %xmm0
|
||||
@ -2013,7 +2013,7 @@ define i32 @hadd32_16(<16 x i32> %x225) {
|
||||
;
|
||||
; SSE3-FAST-LABEL: hadd32_16:
|
||||
; SSE3-FAST: # %bb.0:
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-FAST-NEXT: movd %xmm1, %eax
|
||||
@ -2021,7 +2021,7 @@ define i32 @hadd32_16(<16 x i32> %x225) {
|
||||
;
|
||||
; AVX-SLOW-LABEL: hadd32_16:
|
||||
; AVX-SLOW: # %bb.0:
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -2119,7 +2119,7 @@ define i32 @hadd32_4_pgso(<4 x i32> %x225) !prof !14 {
|
||||
define i32 @hadd32_8_optsize(<8 x i32> %x225) optsize {
|
||||
; SSE3-LABEL: hadd32_8_optsize:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-NEXT: movd %xmm1, %eax
|
||||
@ -2143,7 +2143,7 @@ define i32 @hadd32_8_optsize(<8 x i32> %x225) optsize {
|
||||
define i32 @hadd32_16_optsize(<16 x i32> %x225) optsize {
|
||||
; SSE3-LABEL: hadd32_16_optsize:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE3-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE3-NEXT: phaddd %xmm1, %xmm1
|
||||
; SSE3-NEXT: movd %xmm1, %eax
|
||||
|
@ -1083,7 +1083,7 @@ define <4 x i32> @mul_v4i64_zero_upper_left(<4 x i32> %val1, <4 x i64> %val2) {
|
||||
;
|
||||
; SSE41-LABEL: mul_v4i64_zero_upper_left:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
@ -1252,14 +1252,14 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm11
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm15[2],xmm11[3],xmm15[3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm15[0],xmm0[1],xmm15[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: pxor %xmm14, %xmm14
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm14
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1]
|
||||
; SSE2-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: pxor %xmm6, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1]
|
||||
@ -1306,7 +1306,7 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm3, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm3, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm3, %xmm6
|
||||
@ -1324,7 +1324,7 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
|
||||
;
|
||||
; AVX2-LABEL: mul_v8i64_sext:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxwq %xmm2, %ymm2
|
||||
; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
|
@ -434,7 +434,7 @@ define <8 x i32> @mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmulhuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -467,7 +467,7 @@ define <8 x i32> @mulhsw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmulhw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -498,7 +498,7 @@ define <8 x i32> @mulhsw_v8i16_ashr(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmulhw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -538,9 +538,9 @@ define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmulhuw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm1
|
||||
@ -589,9 +589,9 @@ define <16 x i32> @mulhsw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmulhw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm1
|
||||
@ -639,9 +639,9 @@ define <16 x i32> @mulhsw_v16i16_ashr(<16 x i16> %a, <16 x i16> %b) {
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pmulhw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmovsxwd %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm1
|
||||
@ -702,16 +702,16 @@ define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhuw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
|
||||
; SSE41-NEXT: pmulhuw %xmm5, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
|
||||
; SSE41-NEXT: pmulhuw %xmm6, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero
|
||||
; SSE41-NEXT: pmulhuw %xmm7, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -800,16 +800,16 @@ define <32 x i32> @mulhsw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
|
||||
; SSE41-NEXT: pmulhw %xmm5, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
|
||||
; SSE41-NEXT: pmulhw %xmm6, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero
|
||||
; SSE41-NEXT: pmulhw %xmm7, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -901,16 +901,16 @@ define <32 x i32> @mulhsw_v32i16_ashr(<32 x i16> %a, <32 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmulhw %xmm5, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm5, %xmm5
|
||||
; SSE41-NEXT: pmulhw %xmm6, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm6, %xmm6
|
||||
; SSE41-NEXT: pmulhw %xmm7, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm7, %xmm7
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
|
||||
@ -1026,28 +1026,28 @@ define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm8 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm9 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm10 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm11 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm12 = xmm12[0],zero,xmm12[1],zero,xmm12[2],zero,xmm12[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm13 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm6
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm14 = xmm14[0],zero,xmm14[1],zero,xmm14[2],zero,xmm14[3],zero
|
||||
; SSE41-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm7
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm15 = xmm15[0],zero,xmm15[1],zero,xmm15[2],zero,xmm15[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -1204,28 +1204,28 @@ define <64 x i32> @mulhsw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm8 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm9 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm10 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm11 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm12 = xmm12[0],zero,xmm12[1],zero,xmm12[2],zero,xmm12[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm13 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm14 = xmm14[0],zero,xmm14[1],zero,xmm14[2],zero,xmm14[3],zero
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm15 = xmm15[0],zero,xmm15[1],zero,xmm15[2],zero,xmm15[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -1386,28 +1386,28 @@ define <64 x i32> @mulhsw_v64i16_ashr(<64 x i16> %a, <64 x i16> %b) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm8, %xmm8
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm9, %xmm9
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm10, %xmm10
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm11, %xmm11
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm12, %xmm12
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm13, %xmm13
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm14, %xmm14
|
||||
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm15, %xmm15
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
|
||||
@ -1541,7 +1541,7 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
@ -1552,7 +1552,7 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -1648,7 +1648,7 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
@ -1659,7 +1659,7 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -1775,7 +1775,7 @@ define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pmovsxwq %xmm0, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm2, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE41-NEXT: pmovsxwq %xmm0, %xmm3
|
||||
@ -1786,7 +1786,7 @@ define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -75,7 +75,7 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
|
||||
; CHECK-NEXT: negl %eax
|
||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -21,7 +21,7 @@ define void @test55() {
|
||||
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; CHECK-NEXT: # implicit-def: $ymm2
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
|
@ -8,7 +8,7 @@ define void @foo(i1 %c, <2 x i64> %x) {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: # kill: def $dil killed $dil killed $edi
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rcx
|
||||
; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
|
||||
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
|
@ -7,7 +7,7 @@ define <4 x double> @autogen_SD30452(i1 %L230) {
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm2, %rax
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm2
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
|
||||
|
@ -57,7 +57,7 @@ define <3 x i32> @f_29(<12 x i16> %a, <12 x i16> %b) {
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm2[0,1]
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,2,3]
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
|
||||
; CHECK-NEXT: paddd %xmm3, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
|
@ -77,7 +77,7 @@ define i1 @parseHeaders2_scalar_and(i64 * %ptr) nounwind {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rax
|
||||
; SSE2-NEXT: sete %al
|
||||
|
@ -21,7 +21,7 @@ define { i64, i64 } @PR46189(double %0, double %1, double %2, double %3, double
|
||||
; SSE-NEXT: cvttpd2dq %xmm3, %xmm1
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: movq %xmm0, %rdx
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
|
@ -10,7 +10,7 @@ define void @EntryModule(i8** %buffer_table) {
|
||||
; CHECK-NEXT: vcmpneqps (%rax), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsrld $31, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1
|
||||
; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0
|
||||
|
@ -135,7 +135,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
|
||||
; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
|
||||
; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2
|
||||
; AVX256VL-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
|
@ -522,7 +522,7 @@ define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: test13:
|
||||
; SSE41: # %bb.0: # %vector.ph
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm3
|
||||
@ -697,7 +697,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[3,1,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
|
||||
@ -735,9 +735,9 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
|
||||
; AVX1-NEXT: vpmaxud %xmm6, %xmm7, %xmm6
|
||||
@ -772,7 +772,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: test14:
|
||||
; AVX2: # %bb.0: # %vector.ph
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpmaxud %ymm4, %ymm1, %ymm4
|
||||
@ -873,7 +873,7 @@ define <8 x i16> @test15(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: test15:
|
||||
; SSE41: # %bb.0: # %vector.ph
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm5
|
||||
@ -1005,7 +1005,7 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: test16:
|
||||
; SSE41: # %bb.0: # %vector.ph
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmaxud %xmm1, %xmm4
|
||||
@ -1871,10 +1871,10 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: psubus_16i32_max:
|
||||
; SSE41: # %bb.0: # %vector.ph
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmaxud %xmm2, %xmm0
|
||||
|
@ -27,7 +27,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -54,7 +54,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -79,7 +79,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -104,7 +104,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -172,7 +172,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -207,7 +207,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -234,7 +234,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -261,7 +261,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -346,7 +346,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -397,7 +397,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm6, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -431,7 +431,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -463,7 +463,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -492,7 +492,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -628,7 +628,7 @@ define i32 @sad_4i8() nounwind {
|
||||
; SSE2-NEXT: addq $4, %rax
|
||||
; SSE2-NEXT: jne .LBB4_1
|
||||
; SSE2-NEXT: # %bb.2: # %middle.block
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -649,7 +649,7 @@ define i32 @sad_4i8() nounwind {
|
||||
; AVX-NEXT: addq $4, %rax
|
||||
; AVX-NEXT: jne .LBB4_1
|
||||
; AVX-NEXT: # %bb.2: # %middle.block
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -760,7 +760,7 @@ define i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <16 x i8>* n
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqu (%rdx), %xmm1
|
||||
; SSE2-NEXT: psadbw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: paddq %xmm1, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: retq
|
||||
@ -769,7 +769,7 @@ define i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <16 x i8>* n
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: vpsadbw (%rdx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
@ -803,7 +803,7 @@ define i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* n
|
||||
; SSE2-NEXT: movdqu 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: psadbw %xmm1, %xmm0
|
||||
; SSE2-NEXT: paddq %xmm2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: paddq %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: retq
|
||||
@ -815,7 +815,7 @@ define i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* n
|
||||
; AVX1-NEXT: vpsadbw 16(%rdx), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsadbw (%rdx), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: retq
|
||||
@ -826,7 +826,7 @@ define i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* n
|
||||
; AVX2-NEXT: vpsadbw (%rdx), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -838,7 +838,7 @@ define i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* n
|
||||
; AVX512-NEXT: vpsadbw (%rdx), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -883,7 +883,7 @@ define i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* n
|
||||
; SSE2-NEXT: paddq %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddq %xmm1, %xmm2
|
||||
; SSE2-NEXT: paddq %xmm4, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: paddq %xmm2, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: retq
|
||||
@ -901,7 +901,7 @@ define i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* n
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpsadbw (%rdx), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: retq
|
||||
@ -915,7 +915,7 @@ define i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* n
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -931,7 +931,7 @@ define i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* n
|
||||
; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
@ -945,7 +945,7 @@ define i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* n
|
||||
; AVX512BW-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
@ -987,7 +987,7 @@ define i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x
|
||||
; SSE2-NEXT: psadbw %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -1002,7 +1002,7 @@ define i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x
|
||||
; AVX-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -1051,7 +1051,7 @@ define i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %
|
||||
; SSE2-NEXT: movdqu (%rcx), %xmm2
|
||||
; SSE2-NEXT: psadbw %xmm0, %xmm2
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -1065,7 +1065,7 @@ define i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %
|
||||
; AVX-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; AVX-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
|
@ -443,13 +443,13 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pxor %xmm2, %xmm2
|
||||
; X64-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movdqa %xmm1, %xmm4
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X64-NEXT: movq %xmm4, %rcx
|
||||
; X64-NEXT: pxor %xmm2, %xmm2
|
||||
; X64-NEXT: pcmpgtd %xmm0, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X64-NEXT: psllq $31, %xmm0
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
@ -457,9 +457,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: idivq %rcx
|
||||
; X64-NEXT: movq %rax, %r8
|
||||
; X64-NEXT: movq %rdx, %r11
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rax
|
||||
; X64-NEXT: cqto
|
||||
; X64-NEXT: idivq %rcx
|
||||
@ -478,9 +478,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: idivq %rdi
|
||||
; X64-NEXT: movq %rax, %r9
|
||||
; X64-NEXT: movq %rdx, %rdi
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rsi
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rax
|
||||
; X64-NEXT: cqto
|
||||
; X64-NEXT: idivq %rsi
|
||||
|
@ -651,14 +651,14 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: cmoveq %rax, %r13
|
||||
; X64-NEXT: movq %r13, %xmm0
|
||||
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rbx
|
||||
; X64-NEXT: movq %rbx, %r13
|
||||
; X64-NEXT: sarq $63, %r13
|
||||
; X64-NEXT: shldq $31, %rbx, %r13
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rdx
|
||||
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; X64-NEXT: movq %rdx, %rbp
|
||||
@ -709,8 +709,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; X64-NEXT: psrlq $1, %xmm1
|
||||
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm1 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm1 = mem[2,3,2,3]
|
||||
; X64-NEXT: pxor %xmm0, %xmm0
|
||||
; X64-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
@ -720,8 +720,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: movq %rbx, %r12
|
||||
; X64-NEXT: sarq $63, %r12
|
||||
; X64-NEXT: shldq $31, %rbx, %r12
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm1 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm1 = mem[2,3,2,3]
|
||||
; X64-NEXT: pxor %xmm0, %xmm0
|
||||
; X64-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
@ -773,14 +773,14 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: cmoveq %rax, %r13
|
||||
; X64-NEXT: movq %r13, %xmm0
|
||||
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rbx
|
||||
; X64-NEXT: movq %rbx, %r13
|
||||
; X64-NEXT: sarq $63, %r13
|
||||
; X64-NEXT: shldq $31, %rbx, %r13
|
||||
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,0,1]
|
||||
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; X64-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rdx
|
||||
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; X64-NEXT: movq %rdx, %rbp
|
||||
|
@ -75,16 +75,16 @@ define i32 @eq_i128(<2 x i64> %x, <2 x i64> %y) {
|
||||
define i32 @ne_i256(<4 x i64> %x, <4 x i64> %y) {
|
||||
; SSE2-LABEL: ne_i256:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm4, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm4, %rcx
|
||||
; SSE2-NEXT: movq %xmm0, %rdx
|
||||
; SSE2-NEXT: movq %xmm1, %r8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdi
|
||||
; SSE2-NEXT: xorq %rax, %rdi
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rsi
|
||||
; SSE2-NEXT: xorq %rcx, %rsi
|
||||
; SSE2-NEXT: orq %rdi, %rsi
|
||||
@ -155,16 +155,16 @@ define i32 @ne_i256(<4 x i64> %x, <4 x i64> %y) {
|
||||
define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {
|
||||
; SSE2-LABEL: eq_i256:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm4, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm4, %rcx
|
||||
; SSE2-NEXT: movq %xmm0, %rdx
|
||||
; SSE2-NEXT: movq %xmm1, %r8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdi
|
||||
; SSE2-NEXT: xorq %rax, %rdi
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rsi
|
||||
; SSE2-NEXT: xorq %rcx, %rsi
|
||||
; SSE2-NEXT: orq %rdi, %rsi
|
||||
@ -235,28 +235,28 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {
|
||||
define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
|
||||
; SSE2-LABEL: ne_i512:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rcx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rdx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rsi
|
||||
; SSE2-NEXT: movq %xmm0, %r11
|
||||
; SSE2-NEXT: movq %xmm2, %r8
|
||||
; SSE2-NEXT: movq %xmm1, %r9
|
||||
; SSE2-NEXT: movq %xmm3, %r10
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdi
|
||||
; SSE2-NEXT: xorq %rax, %rdi
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorq %rcx, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: xorq %rdx, %rcx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdx
|
||||
; SSE2-NEXT: xorq %rsi, %rdx
|
||||
; SSE2-NEXT: orq %rcx, %rdx
|
||||
@ -426,28 +426,28 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
|
||||
define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
|
||||
; SSE2-LABEL: eq_i512:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rcx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rdx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm8, %rsi
|
||||
; SSE2-NEXT: movq %xmm0, %r11
|
||||
; SSE2-NEXT: movq %xmm2, %r8
|
||||
; SSE2-NEXT: movq %xmm1, %r9
|
||||
; SSE2-NEXT: movq %xmm3, %r10
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdi
|
||||
; SSE2-NEXT: xorq %rax, %rdi
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorq %rcx, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: xorq %rdx, %rcx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rdx
|
||||
; SSE2-NEXT: xorq %rsi, %rdx
|
||||
; SSE2-NEXT: orq %rcx, %rdx
|
||||
|
@ -2094,9 +2094,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm7, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm7, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
@ -2137,9 +2137,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm4
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm2, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm1, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
@ -2336,9 +2336,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm3
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm7, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm7, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
@ -2379,9 +2379,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm4
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm2, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm1, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
|
@ -224,7 +224,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SLM32: # %bb.0:
|
||||
; SLM32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
; SLM32-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SLM32-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLM32-NEXT: movdqa %xmm1, %xmm4
|
||||
; SLM32-NEXT: movdqa %xmm3, %xmm5
|
||||
@ -244,7 +244,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SLM64: # %bb.0:
|
||||
; SLM64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
; SLM64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SLM64-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLM64-NEXT: movdqa %xmm1, %xmm4
|
||||
; SLM64-NEXT: movdqa %xmm3, %xmm5
|
||||
@ -270,7 +270,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SLOW32-NEXT: movdqa %xmm1, %xmm4
|
||||
; SLOW32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
|
||||
; SLOW32-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SLOW32-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLOW32-NEXT: movdqa %xmm3, %xmm0
|
||||
; SLOW32-NEXT: pmulhw %xmm2, %xmm0
|
||||
@ -291,7 +291,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SLOW64-NEXT: movdqa %xmm1, %xmm4
|
||||
; SLOW64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
|
||||
; SLOW64-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SLOW64-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SLOW64-NEXT: movdqa %xmm3, %xmm0
|
||||
; SLOW64-NEXT: pmulhw %xmm2, %xmm0
|
||||
@ -306,7 +306,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -322,7 +322,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -336,7 +336,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
;
|
||||
; AVX2-32-LABEL: test_mul_v16i32_v16i8:
|
||||
; AVX2-32: # %bb.0:
|
||||
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
||||
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
@ -346,7 +346,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
|
||||
;
|
||||
; AVX2-64-LABEL: test_mul_v16i32_v16i8:
|
||||
; AVX2-64: # %bb.0:
|
||||
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
||||
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
@ -494,7 +494,7 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
|
||||
;
|
||||
; SSE4-32-LABEL: test_mul_v8i32_v8i16:
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE4-32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -504,7 +504,7 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
|
||||
;
|
||||
; SSE4-64-LABEL: test_mul_v8i32_v8i16:
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE4-64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -609,9 +609,9 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
|
||||
;
|
||||
; SSE4-32-LABEL: test_mul_v16i32_v16i16:
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
@ -625,9 +625,9 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
|
||||
;
|
||||
; SSE4-64-LABEL: test_mul_v16i32_v16i16:
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
@ -880,7 +880,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SLM32-NEXT: movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
|
||||
; SLM32-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLM32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SLM32-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLM32-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
|
||||
@ -896,7 +896,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SLM64-NEXT: movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
|
||||
; SLM64-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLM64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SLM64-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLM64-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
|
||||
@ -910,7 +910,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SLOW32: # %bb.0:
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SLOW32-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLOW32-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SLOW32-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -926,7 +926,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SLOW64: # %bb.0:
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SLOW64-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLOW64-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SLOW64-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -942,7 +942,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -958,7 +958,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
@ -972,7 +972,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
;
|
||||
; AVX2-32-LABEL: test_mul_v16i32_v16i8_minsize:
|
||||
; AVX2-32: # %bb.0:
|
||||
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
||||
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
@ -982,7 +982,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
|
||||
;
|
||||
; AVX2-64-LABEL: test_mul_v16i32_v16i8_minsize:
|
||||
; AVX2-64: # %bb.0:
|
||||
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
||||
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
|
||||
@ -1077,7 +1077,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
; SLM32-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SLM32: # %bb.0:
|
||||
; SLM32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLM32-NEXT: pmulld %xmm2, %xmm0
|
||||
@ -1087,7 +1087,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
; SLM64-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SLM64: # %bb.0:
|
||||
; SLM64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLM64-NEXT: pmulld %xmm2, %xmm0
|
||||
@ -1096,7 +1096,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
;
|
||||
; SLOW32-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SLOW32: # %bb.0:
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLOW32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -1106,7 +1106,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
;
|
||||
; SLOW64-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SLOW64: # %bb.0:
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLOW64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -1116,7 +1116,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
;
|
||||
; SSE4-32-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE4-32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -1126,7 +1126,7 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
;
|
||||
; SSE4-64-LABEL: test_mul_v8i32_v8i16_minsize:
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE4-64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778]
|
||||
@ -1155,9 +1155,9 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
|
||||
define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
; SLM32-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SLM32: # %bb.0:
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SLM32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLM32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -1171,9 +1171,9 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
;
|
||||
; SLM64-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SLM64: # %bb.0:
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SLM64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLM64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
@ -1187,9 +1187,9 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
;
|
||||
; SLOW32-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SLOW32: # %bb.0:
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SLOW32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLOW32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
@ -1203,9 +1203,9 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
;
|
||||
; SLOW64-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SLOW64: # %bb.0:
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SLOW64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SLOW64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
@ -1219,9 +1219,9 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
;
|
||||
; SSE4-32-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SSE4-32: # %bb.0:
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE4-32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
@ -1235,9 +1235,9 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
|
||||
;
|
||||
; SSE4-64-LABEL: test_mul_v16i32_v16i16_minsize:
|
||||
; SSE4-64: # %bb.0:
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE4-64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
|
@ -206,10 +206,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
|
||||
; X64-NEXT: cmovll %ecx, %edx
|
||||
; X64-NEXT: movd %edx, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %edx
|
||||
; X64-NEXT: movslq %edx, %rdx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %esi
|
||||
; X64-NEXT: movslq %esi, %rsi
|
||||
; X64-NEXT: imulq %rdx, %rsi
|
||||
@ -476,9 +476,9 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
|
||||
; X64-NEXT: imull %edx, %ecx
|
||||
; X64-NEXT: cmovol %edi, %ecx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm2, %edx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm2, %esi
|
||||
; X64-NEXT: movl %esi, %edi
|
||||
; X64-NEXT: imull %edx, %edi
|
||||
|
@ -5,7 +5,7 @@
|
||||
define <4 x i64> @autogen_SD88863() {
|
||||
; CHECK-LABEL: autogen_SD88863:
|
||||
; CHECK: # %bb.0: # %BB
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
|
||||
|
@ -12,9 +12,9 @@ define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %ecx
|
||||
; CHECK-NEXT: movd %edx, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm5, %eax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm5, %ecx
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %ecx
|
||||
@ -41,9 +41,9 @@ define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %ecx
|
||||
; CHECK-NEXT: movd %edx, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm4, %eax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm4, %ecx
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %ecx
|
||||
@ -79,9 +79,9 @@ define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: divl %ecx
|
||||
; CHECK-NEXT: movd %edx, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm5, %eax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm5, %ecx
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: divl %ecx
|
||||
@ -108,9 +108,9 @@ define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: divl %ecx
|
||||
; CHECK-NEXT: movd %edx, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm4, %eax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm4, %ecx
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: divl %ecx
|
||||
|
@ -2791,8 +2791,8 @@ define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind {
|
||||
;
|
||||
; X64-SSE2-LABEL: test_mm_storeh_pi:
|
||||
; X64-SSE2: # %bb.0:
|
||||
; X64-SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x4e]
|
||||
; X64-SSE2-NEXT: # xmm0 = xmm0[2,3,0,1]
|
||||
; X64-SSE2-NEXT: punpckhqdq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x6d,0xc0]
|
||||
; X64-SSE2-NEXT: # xmm0 = xmm0[1,1]
|
||||
; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
|
||||
; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
|
||||
; X64-SSE2-NEXT: retq # encoding: [0xc3]
|
||||
|
@ -692,24 +692,24 @@ entry:
|
||||
define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
|
||||
; SSE-LABEL: insertps_from_shufflevector_i32_2:
|
||||
; SSE: ## %bb.0: ## %entry
|
||||
; SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
|
||||
; SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
|
||||
; SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pblendw $12, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc1,0x0c]
|
||||
; SSE-NEXT: ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
|
||||
; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
;
|
||||
; AVX1-LABEL: insertps_from_shufflevector_i32_2:
|
||||
; AVX1: ## %bb.0: ## %entry
|
||||
; AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512-LABEL: insertps_from_shufflevector_i32_2:
|
||||
; AVX512: ## %bb.0: ## %entry
|
||||
; AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
@ -1875,8 +1875,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
; X86-SSE-LABEL: insertps_pr20411:
|
||||
; X86-SSE: ## %bb.0:
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
|
||||
; X86-SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
|
||||
; X86-SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X86-SSE-NEXT: pblendw $243, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc8,0xf3]
|
||||
; X86-SSE-NEXT: ## xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
|
||||
; X86-SSE-NEXT: movdqu %xmm1, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x08]
|
||||
@ -1885,8 +1885,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
; X86-AVX1-LABEL: insertps_pr20411:
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X86-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; X86-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; X86-AVX1-NEXT: vmovups %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x11,0x00]
|
||||
@ -1895,8 +1895,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
; X86-AVX512-LABEL: insertps_pr20411:
|
||||
; X86-AVX512: ## %bb.0:
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; X86-AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X86-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; X86-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X86-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; X86-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; X86-AVX512-NEXT: vmovups %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
|
||||
@ -1904,8 +1904,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
;
|
||||
; X64-SSE-LABEL: insertps_pr20411:
|
||||
; X64-SSE: ## %bb.0:
|
||||
; X64-SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
|
||||
; X64-SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
|
||||
; X64-SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X64-SSE-NEXT: pblendw $243, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc8,0xf3]
|
||||
; X64-SSE-NEXT: ## xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
|
||||
; X64-SSE-NEXT: movdqu %xmm1, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x0f]
|
||||
@ -1913,8 +1913,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
;
|
||||
; X64-AVX1-LABEL: insertps_pr20411:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; X64-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x11,0x07]
|
||||
@ -1922,8 +1922,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
|
||||
;
|
||||
; X64-AVX512-LABEL: insertps_pr20411:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
|
||||
; X64-AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
|
||||
; X64-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
|
||||
; X64-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
|
||||
; X64-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
|
||||
; X64-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
|
||||
|
@ -73,7 +73,7 @@ define <2 x i32> @test4(<8 x i32> %v) {
|
||||
define <2 x i32> @test5(<8 x i32> %v) {
|
||||
; SSE2-LABEL: test5:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,2,2]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -175,7 +175,7 @@ define <2 x i32> @test9(<8 x i32> %v) {
|
||||
define <2 x i32> @test10(<8 x i32> %v) {
|
||||
; SSE2-LABEL: test10:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,2,2]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -248,9 +248,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm3
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm4, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm4, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
@ -264,9 +264,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm1, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
|
@ -335,9 +335,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm7
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm2, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
@ -369,9 +369,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm3
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm1, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
|
@ -40,7 +40,7 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
@ -55,7 +55,7 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; X64-SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; X64-SSE-NEXT: retq
|
||||
|
@ -151,9 +151,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: movl $-1, %eax
|
||||
; X64-NEXT: cmoval %eax, %ecx
|
||||
; X64-NEXT: movd %ecx, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %ecx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %edx
|
||||
; X64-NEXT: imulq %rcx, %rdx
|
||||
; X64-NEXT: movq %rdx, %rcx
|
||||
@ -361,9 +361,9 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: cmovol %ecx, %eax
|
||||
; X64-NEXT: movd %eax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; X64-NEXT: movd %xmm3, %edx
|
||||
; X64-NEXT: mull %edx
|
||||
; X64-NEXT: cmovol %ecx, %eax
|
||||
|
@ -2411,7 +2411,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: psrld $2, %xmm2
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,1]
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,3]
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,1]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
@ -2516,7 +2516,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; CHECK-SSE2-NEXT: psrld $2, %xmm2
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,1]
|
||||
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,3]
|
||||
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [14,4294967295,16,1]
|
||||
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
|
@ -16,7 +16,7 @@ define <2 x i64> @var_shuffle_v2i64(<2 x i64> %v, <2 x i64> %indices) nounwind {
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movq %xmm1, %rax
|
||||
; SSE3-NEXT: andl $1, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movq %xmm1, %rcx
|
||||
; SSE3-NEXT: andl $1, %ecx
|
||||
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@ -29,7 +29,7 @@ define <2 x i64> @var_shuffle_v2i64(<2 x i64> %v, <2 x i64> %indices) nounwind {
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movq %xmm1, %rax
|
||||
; SSSE3-NEXT: andl $1, %eax
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm1, %rcx
|
||||
; SSSE3-NEXT: andl $1, %ecx
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@ -69,7 +69,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
|
||||
; SSE3-NEXT: movd %xmm1, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm1, %esi
|
||||
@ -379,7 +379,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movq %xmm1, %rax
|
||||
; SSE3-NEXT: andl $1, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movq %xmm1, %rcx
|
||||
; SSE3-NEXT: andl $1, %ecx
|
||||
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@ -391,7 +391,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movq %xmm1, %rax
|
||||
; SSSE3-NEXT: andl $1, %eax
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm1, %rcx
|
||||
; SSSE3-NEXT: andl $1, %ecx
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@ -430,7 +430,7 @@ define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwi
|
||||
; SSE3-NEXT: movd %xmm1, %eax
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE3-NEXT: movd %xmm2, %edx
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; SSE3-NEXT: movd %xmm1, %esi
|
||||
|
@ -135,7 +135,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE-32-NEXT: andl $-8, %esp
|
||||
; SSE-32-NEXT: subl $24, %esp
|
||||
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fstps (%esp)
|
||||
@ -154,7 +154,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE-64: # %bb.0:
|
||||
; SSE-64-NEXT: movq %xmm0, %rax
|
||||
; SSE-64-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-64-NEXT: movq %xmm0, %rax
|
||||
; SSE-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-64-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -172,7 +172,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE41-32-NEXT: andl $-8, %esp
|
||||
; SSE41-32-NEXT: subl $24, %esp
|
||||
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: fstps (%esp)
|
||||
@ -191,7 +191,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE41-64: # %bb.0:
|
||||
; SSE41-64-NEXT: movq %xmm0, %rax
|
||||
; SSE41-64-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-64-NEXT: movq %xmm0, %rax
|
||||
; SSE41-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-64-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -209,7 +209,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; AVX-32-NEXT: andl $-8, %esp
|
||||
; AVX-32-NEXT: subl $24, %esp
|
||||
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fstps (%esp)
|
||||
@ -236,7 +236,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm1
|
||||
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm0
|
||||
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
@ -271,7 +271,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE-32-NEXT: andl $-8, %esp
|
||||
; SSE-32-NEXT: subl $24, %esp
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
@ -313,7 +313,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE-64-NEXT: # %bb.1:
|
||||
; SSE-64-NEXT: addss %xmm0, %xmm0
|
||||
; SSE-64-NEXT: .LBB3_2:
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE-64-NEXT: movq %xmm1, %rax
|
||||
; SSE-64-NEXT: movq %rax, %rcx
|
||||
; SSE-64-NEXT: shrq %rcx
|
||||
@ -340,7 +340,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE41-32-NEXT: andl $-8, %esp
|
||||
; SSE41-32-NEXT: subl $24, %esp
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
@ -382,7 +382,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; SSE41-64-NEXT: # %bb.1:
|
||||
; SSE41-64-NEXT: addss %xmm0, %xmm0
|
||||
; SSE41-64-NEXT: .LBB3_2:
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-64-NEXT: movq %xmm1, %rax
|
||||
; SSE41-64-NEXT: movq %rax, %rcx
|
||||
; SSE41-64-NEXT: shrq %rcx
|
||||
@ -410,7 +410,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; AVX-32-NEXT: andl $-8, %esp
|
||||
; AVX-32-NEXT: subl $24, %esp
|
||||
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX-32-NEXT: shrl $31, %eax
|
||||
@ -471,7 +471,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm1
|
||||
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm0
|
||||
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
@ -1146,7 +1146,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
|
||||
; SSE-32-NEXT: andl $-8, %esp
|
||||
; SSE-32-NEXT: subl $32, %esp
|
||||
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
@ -1164,7 +1164,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
|
||||
; SSE-64: # %bb.0:
|
||||
; SSE-64-NEXT: movq %xmm0, %rax
|
||||
; SSE-64-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-64-NEXT: movq %xmm0, %rax
|
||||
; SSE-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-64-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -1182,7 +1182,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
|
||||
; SSE41-32-NEXT: andl $-8, %esp
|
||||
; SSE41-32-NEXT: subl $32, %esp
|
||||
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
@ -1200,7 +1200,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
|
||||
; SSE41-64: # %bb.0:
|
||||
; SSE41-64-NEXT: movq %xmm0, %rax
|
||||
; SSE41-64-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-64-NEXT: movq %xmm0, %rax
|
||||
; SSE41-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-64-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -1218,7 +1218,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
|
||||
; AVX-32-NEXT: andl $-8, %esp
|
||||
; AVX-32-NEXT: subl $32, %esp
|
||||
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
|
@ -281,7 +281,7 @@ define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 {
|
||||
; AVX1-LABEL: sitofp_v8i16_v8f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
|
@ -18,7 +18,7 @@ define <8 x float> @cvt_v8i16_v8f32(<8 x i16> %src) {
|
||||
; CHECK-LABEL: cvt_v8i16_v8f32:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
|
@ -95,7 +95,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -305,14 +305,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -421,7 +421,7 @@ define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
|
||||
; SSE-LABEL: sitofp_4i32_to_4f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
@ -440,7 +440,7 @@ define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -448,7 +448,7 @@ define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -468,7 +468,7 @@ define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -476,7 +476,7 @@ define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -504,7 +504,7 @@ define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -512,7 +512,7 @@ define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -533,7 +533,7 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -541,7 +541,7 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1015,7 +1015,7 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
|
||||
; SSE41-NEXT: por %xmm3, %xmm2
|
||||
; SSE41-NEXT: subpd %xmm3, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: por %xmm3, %xmm1
|
||||
; SSE41-NEXT: subpd %xmm3, %xmm1
|
||||
@ -1074,7 +1074,7 @@ define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1083,7 +1083,7 @@ define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1103,7 +1103,7 @@ define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1112,7 +1112,7 @@ define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1140,7 +1140,7 @@ define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1149,7 +1149,7 @@ define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1170,7 +1170,7 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1179,7 +1179,7 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1209,7 +1209,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -1274,7 +1274,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
|
||||
define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
|
||||
; SSE2-LABEL: sitofp_2i64_to_4f32_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -1345,7 +1345,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -1464,7 +1464,7 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
|
||||
; AVX1-LABEL: sitofp_8i16_to_4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
@ -1568,7 +1568,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -1576,7 +1576,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -1719,7 +1719,7 @@ define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
@ -1728,7 +1728,7 @@ define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
|
||||
; AVX1-LABEL: sitofp_8i16_to_8f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
@ -1868,7 +1868,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB41_3:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB41_4
|
||||
@ -1969,7 +1969,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
|
||||
define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
|
||||
; SSE2-LABEL: uitofp_2i64_to_2f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB42_1
|
||||
@ -2102,7 +2102,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB43_3:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB43_4
|
||||
@ -2462,7 +2462,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: addss %xmm2, %xmm2
|
||||
; SSE2-NEXT: .LBB49_3:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB49_4
|
||||
@ -2494,7 +2494,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB49_9:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB49_10
|
||||
@ -2769,7 +2769,7 @@ define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
@ -2906,7 +2906,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
@ -3103,7 +3103,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
@ -3111,7 +3111,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
|
||||
@ -3209,7 +3209,7 @@ define <4 x double> @sitofp_load_4i32_to_4f64(<4 x i32> *%a) {
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -3229,7 +3229,7 @@ define <4 x double> @sitofp_load_4i16_to_4f64(<4 x i16> *%a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3237,7 +3237,7 @@ define <4 x double> @sitofp_load_4i16_to_4f64(<4 x i16> *%a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxwd (%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3259,7 +3259,7 @@ define <4 x double> @sitofp_load_4i8_to_4f64(<4 x i8> *%a) {
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3267,7 +3267,7 @@ define <4 x double> @sitofp_load_4i8_to_4f64(<4 x i8> *%a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovsxbd (%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3770,7 +3770,7 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
|
||||
; SSE41-NEXT: por %xmm2, %xmm0
|
||||
; SSE41-NEXT: subpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: por %xmm2, %xmm1
|
||||
; SSE41-NEXT: subpd %xmm2, %xmm1
|
||||
@ -3831,7 +3831,7 @@ define <4 x double> @uitofp_load_4i16_to_4f64(<4 x i16> *%a) {
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3839,7 +3839,7 @@ define <4 x double> @uitofp_load_4i16_to_4f64(<4 x i16> *%a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3861,7 +3861,7 @@ define <4 x double> @uitofp_load_4i8_to_4f64(<4 x i8> *%a) {
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3869,7 +3869,7 @@ define <4 x double> @uitofp_load_4i8_to_4f64(<4 x i8> *%a) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3894,7 +3894,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -3902,7 +3902,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -4073,7 +4073,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm3
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -4081,7 +4081,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -4090,7 +4090,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -4098,7 +4098,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
@ -4378,7 +4378,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB83_3:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB83_4
|
||||
@ -4410,7 +4410,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB83_9:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB83_10
|
||||
@ -4729,7 +4729,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: addss %xmm3, %xmm3
|
||||
; SSE2-NEXT: .LBB87_3:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_4
|
||||
@ -4760,7 +4760,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB87_9:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm5, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_10
|
||||
@ -4791,7 +4791,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm5
|
||||
; SSE2-NEXT: addss %xmm5, %xmm5
|
||||
; SSE2-NEXT: .LBB87_15:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_16
|
||||
@ -4826,7 +4826,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: .LBB87_21:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_22
|
||||
|
@ -107,7 +107,7 @@ define <3 x i32> @saddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -119,7 +119,7 @@ define <3 x i32> @saddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -512,13 +512,13 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm0, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm5, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -644,7 +644,7 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -667,9 +667,9 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm3, (%rdi)
|
||||
@ -683,7 +683,7 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm3, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -769,7 +769,7 @@ define <8 x i32> @saddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
@ -889,7 +889,7 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %ecx
|
||||
; SSE2-NEXT: movw %cx, 9(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %edx
|
||||
; SSE2-NEXT: movw %dx, 6(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
@ -924,7 +924,7 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 9(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %edx
|
||||
; SSSE3-NEXT: movw %dx, 6(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
|
@ -1193,13 +1193,13 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm6, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm0, %xmm9, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm5, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpackssdw %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm5, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -1376,11 +1376,11 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm6
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm4
|
||||
@ -1407,7 +1407,7 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -1436,9 +1436,9 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
@ -1450,9 +1450,9 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
@ -1478,7 +1478,7 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm3, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -1730,11 +1730,11 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: movq %rdi, %rax
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm3, %xmm7
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm9 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmullw %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255]
|
||||
@ -1754,12 +1754,12 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pcmpeqb %xmm1, %xmm4
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm2, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm7
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmullw %xmm7, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm10, %xmm1
|
||||
@ -1783,7 +1783,7 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm8
|
||||
; SSE41-NEXT: psrad $31, %xmm8
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm5
|
||||
; SSE41-NEXT: psrad $31, %xmm5
|
||||
@ -1795,7 +1795,7 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm7
|
||||
; SSE41-NEXT: psrad $31, %xmm7
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm3
|
||||
; SSE41-NEXT: psrad $31, %xmm3
|
||||
@ -1841,9 +1841,9 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpmullw %xmm7, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm5, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
@ -1865,9 +1865,9 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm7
|
||||
; AVX1-NEXT: vpmullw %xmm6, %xmm7, %xmm6
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
@ -1882,14 +1882,14 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
|
||||
; AVX1-NEXT: vmovdqa %xmm9, 16(%rdi)
|
||||
@ -1929,9 +1929,9 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
|
||||
; AVX2-NEXT: vmovdqa %ymm4, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -2411,8 +2411,8 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pand %xmm10, %xmm9
|
||||
; SSE41-NEXT: packuswb %xmm11, %xmm9
|
||||
; SSE41-NEXT: pmovsxbw %xmm3, %xmm8
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm7[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm7[2,3,2,3]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmullw %xmm12, %xmm8
|
||||
; SSE41-NEXT: pxor %xmm7, %xmm7
|
||||
; SSE41-NEXT: pcmpgtb %xmm9, %xmm7
|
||||
@ -2436,8 +2436,8 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pand %xmm10, %xmm12
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm12
|
||||
; SSE41-NEXT: pmovsxbw %xmm2, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmullw %xmm7, %xmm3
|
||||
; SSE41-NEXT: pxor %xmm7, %xmm7
|
||||
; SSE41-NEXT: pcmpgtb %xmm12, %xmm7
|
||||
@ -2461,8 +2461,8 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pand %xmm10, %xmm11
|
||||
; SSE41-NEXT: packuswb %xmm7, %xmm11
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmullw %xmm6, %xmm2
|
||||
; SSE41-NEXT: pxor %xmm6, %xmm6
|
||||
; SSE41-NEXT: pcmpgtb %xmm11, %xmm6
|
||||
@ -2487,8 +2487,8 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: packuswb %xmm6, %xmm5
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmullw %xmm7, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxbw %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm4, %xmm0
|
||||
@ -2523,7 +2523,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2538,7 +2538,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 208(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2553,7 +2553,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 144(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2568,7 +2568,7 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 80(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2606,9 +2606,9 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm6, %xmm7
|
||||
; AVX1-NEXT: vpmullw %xmm4, %xmm7, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpmullw %xmm5, %xmm6, %xmm5
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
@ -2628,9 +2628,9 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm7
|
||||
; AVX1-NEXT: vpmullw %xmm6, %xmm7, %xmm6
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
@ -2652,9 +2652,9 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmullw %xmm5, %xmm7, %xmm5
|
||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm11, %xmm7
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpmullw %xmm4, %xmm6, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
@ -2672,9 +2672,9 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm5
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpmullw %xmm5, %xmm6, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm2
|
||||
@ -2699,37 +2699,37 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 64(%rdi)
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 224(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 240(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 208(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 160(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 176(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 144(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 96(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 112(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 80(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 32(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 48(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
@ -2794,15 +2794,15 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpackuswb %ymm6, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm7, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm2, %ymm8
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm6, %ymm6
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm7, %ymm7
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm5, %ymm5
|
||||
@ -2932,7 +2932,7 @@ define <8 x i32> @smulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
@ -2971,9 +2971,9 @@ define <8 x i32> @smulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) nounwind {
|
||||
; SSE2-LABEL: smulo_v2i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %r8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rcx
|
||||
; SSE2-NEXT: movq %xmm1, %rdx
|
||||
; SSE2-NEXT: movq %xmm0, %rsi
|
||||
@ -2996,9 +2996,9 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
||||
;
|
||||
; SSSE3-LABEL: smulo_v2i64:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm2, %r8
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm2, %rcx
|
||||
; SSSE3-NEXT: movq %xmm1, %rdx
|
||||
; SSSE3-NEXT: movq %xmm0, %rsi
|
||||
@ -3158,7 +3158,7 @@ define <4 x i32> @smulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: por %xmm3, %xmm1
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
; SSE2-NEXT: movw %cx, 6(%rdi)
|
||||
; SSE2-NEXT: movd %xmm2, %edx
|
||||
@ -3213,7 +3213,7 @@ define <4 x i32> @smulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: por %xmm3, %xmm1
|
||||
; SSSE3-NEXT: movd %xmm0, %eax
|
||||
; SSSE3-NEXT: movw %ax, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm0, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 6(%rdi)
|
||||
; SSSE3-NEXT: movd %xmm2, %edx
|
||||
|
@ -109,7 +109,7 @@ define <3 x i32> @ssubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: movq %xmm3, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -122,7 +122,7 @@ define <3 x i32> @ssubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movq %xmm3, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -517,13 +517,13 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm0, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm5, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -649,7 +649,7 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -672,9 +672,9 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm3, (%rdi)
|
||||
@ -688,7 +688,7 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm3, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -774,7 +774,7 @@ define <8 x i32> @ssubo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
@ -899,7 +899,7 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %ecx
|
||||
; SSE2-NEXT: movw %cx, 9(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %edx
|
||||
; SSE2-NEXT: movw %dx, 6(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
@ -934,7 +934,7 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 9(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %edx
|
||||
; SSSE3-NEXT: movw %dx, 6(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
|
@ -120,7 +120,7 @@ define <3 x i32> @uaddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -132,7 +132,7 @@ define <3 x i32> @uaddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -601,13 +601,13 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpmaxud %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm6, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm7
|
||||
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -727,7 +727,7 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm4
|
||||
; SSE41-NEXT: psrad $31, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -750,9 +750,9 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
@ -766,7 +766,7 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -850,7 +850,7 @@ define <8 x i32> @uaddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
@ -956,7 +956,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %ecx
|
||||
; SSE2-NEXT: movw %cx, 9(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %edx
|
||||
; SSE2-NEXT: movw %dx, 6(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
@ -988,7 +988,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 9(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %edx
|
||||
; SSSE3-NEXT: movw %dx, 6(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
|
@ -1008,16 +1008,16 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm8, %xmm5
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm9, %xmm5
|
||||
; AVX1-NEXT: vpackssdw %xmm13, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpacksswb %xmm11, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpacksswb %xmm11, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmulld %xmm6, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpmulld %xmm10, %xmm12, %xmm6
|
||||
; AVX1-NEXT: vpmovsxbd %xmm7, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm7[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm5[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm5, %xmm11, %xmm1
|
||||
; AVX1-NEXT: vpacksswb %xmm11, %xmm11, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -1217,7 +1217,7 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -1254,9 +1254,9 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
@ -1278,7 +1278,7 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -1560,7 +1560,7 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
@ -1572,7 +1572,7 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm3
|
||||
; SSE41-NEXT: psrad $31, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm4
|
||||
; SSE41-NEXT: psrad $31, %xmm4
|
||||
@ -1647,14 +1647,14 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
|
||||
; AVX1-NEXT: vmovdqa %xmm5, 16(%rdi)
|
||||
@ -1689,9 +1689,9 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
|
||||
; AVX2-NEXT: vmovdqa %ymm4, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -2230,7 +2230,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm14[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm14[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2245,7 +2245,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 208(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm12[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm12[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2260,7 +2260,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 144(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2275,7 +2275,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, 80(%rdi)
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
@ -2390,37 +2390,37 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vmovdqa %xmm4, 64(%rdi)
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
|
||||
; AVX1-NEXT: vmovdqa %xmm4, (%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vmovdqa %xmm4, 224(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vmovdqa %xmm4, 240(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vmovdqa %xmm3, 208(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vmovdqa %xmm3, 160(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vmovdqa %xmm3, 176(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa %xmm2, 144(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa %xmm2, 96(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa %xmm2, 112(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 80(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 32(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 48(%rdi)
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
@ -2475,15 +2475,15 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpackuswb %ymm7, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm6, %ymm6
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm7, %ymm7
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm5, %ymm5
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
|
||||
@ -2608,7 +2608,7 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
@ -2646,9 +2646,9 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) nounwind {
|
||||
; SSE2-LABEL: umulo_v2i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %r8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %r10
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: movq %xmm1, %rdx
|
||||
@ -2672,9 +2672,9 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
||||
;
|
||||
; SSSE3-LABEL: umulo_v2i64:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm2, %r8
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; SSSE3-NEXT: movq %xmm2, %r10
|
||||
; SSSE3-NEXT: movq %xmm0, %rax
|
||||
; SSSE3-NEXT: movq %xmm1, %rdx
|
||||
@ -2829,7 +2829,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: por %xmm3, %xmm0
|
||||
; SSE2-NEXT: movd %xmm2, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm2, %ecx
|
||||
; SSE2-NEXT: movw %cx, 6(%rdi)
|
||||
; SSE2-NEXT: movd %xmm1, %edx
|
||||
@ -2873,7 +2873,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: por %xmm3, %xmm0
|
||||
; SSSE3-NEXT: movd %xmm2, %eax
|
||||
; SSSE3-NEXT: movw %ax, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 6(%rdi)
|
||||
; SSSE3-NEXT: movd %xmm1, %edx
|
||||
|
@ -126,7 +126,7 @@ define <3 x i32> @usubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movq %xmm0, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -140,7 +140,7 @@ define <3 x i32> @usubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm2
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
|
||||
; SSSE3-NEXT: movq %xmm0, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm0, 8(%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
@ -644,13 +644,13 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX1-NEXT: vpminud %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm6, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm7
|
||||
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm6
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
@ -771,7 +771,7 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm1
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
|
||||
; SSE41-NEXT: pslld $31, %xmm2
|
||||
; SSE41-NEXT: psrad $31, %xmm2
|
||||
@ -793,9 +793,9 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
@ -809,7 +809,7 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
@ -895,7 +895,7 @@ define <8 x i32> @usubo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
@ -1003,7 +1003,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %ecx
|
||||
; SSE2-NEXT: movw %cx, 9(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm1, %edx
|
||||
; SSE2-NEXT: movw %dx, 6(%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
@ -1035,7 +1035,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %ecx
|
||||
; SSSE3-NEXT: movw %cx, 9(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %edx
|
||||
; SSSE3-NEXT: movw %dx, 6(%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
|
||||
|
@ -6296,7 +6296,7 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -6342,7 +6342,7 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -6375,7 +6375,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
|
||||
; CHECK-NEXT: movd %xmm1, %eax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
|
||||
@ -6414,7 +6414,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm2
|
||||
; CHECK-NEXT: cvtsi2ss %eax, %xmm2
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ss %eax, %xmm0
|
||||
@ -6535,7 +6535,7 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
|
||||
; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -6575,14 +6575,14 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm3
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -6642,7 +6642,7 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
||||
@ -6650,7 +6650,7 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -6970,7 +6970,7 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: addss %xmm0, %xmm0
|
||||
; CHECK-NEXT: .LBB174_2: # %entry
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: movq %rax, %rcx
|
||||
; CHECK-NEXT: shrq %rcx
|
||||
@ -7031,7 +7031,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
|
||||
; CHECK-NEXT: movd %xmm1, %eax
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
|
||||
@ -7082,7 +7082,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm2
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
|
||||
@ -7157,7 +7157,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
|
||||
; AVX1-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
|
||||
; AVX1-NEXT: vaddpd %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; AVX1-NEXT: vsubpd %xmm3, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm5 = xmm4[1,0]
|
||||
@ -7458,7 +7458,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: addss %xmm2, %xmm2
|
||||
; CHECK-NEXT: .LBB182_2: # %entry
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: movq %rax, %rcx
|
||||
; CHECK-NEXT: shrq %rcx
|
||||
@ -7487,7 +7487,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
||||
; CHECK-NEXT: addss %xmm1, %xmm1
|
||||
; CHECK-NEXT: .LBB182_6: # %entry
|
||||
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: movq %rax, %rcx
|
||||
; CHECK-NEXT: shrq %rcx
|
||||
|
@ -30,7 +30,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: psllq %xmm2, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: psllq %xmm4, %xmm5
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
|
||||
@ -38,7 +38,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE2-NEXT: psubq %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: psrlq %xmm3, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: psrlq %xmm3, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
|
||||
; SSE2-NEXT: orpd %xmm5, %xmm1
|
||||
@ -56,7 +56,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: psllq %xmm2, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: psllq %xmm5, %xmm4
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm0[0,1,2,3],xmm4[4,5,6,7]
|
||||
@ -64,7 +64,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE41-NEXT: psubq %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE41-NEXT: psrlq %xmm0, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: psrlq %xmm0, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE41-NEXT: por %xmm1, %xmm4
|
||||
@ -78,13 +78,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64]
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
|
||||
@ -212,7 +212,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-SSE-NEXT: psllq %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: psllq %xmm4, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
|
||||
@ -220,7 +220,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; X32-SSE-NEXT: psubq %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm1
|
||||
@ -249,7 +249,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: psrld %xmm6, %xmm3
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm6
|
||||
; SSE2-NEXT: psrld %xmm5, %xmm6
|
||||
@ -285,7 +285,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm0[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm6 = xmm4[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm7
|
||||
; SSE41-NEXT: psrld %xmm6, %xmm7
|
||||
@ -465,7 +465,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: psrld %xmm6, %xmm3
|
||||
; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm6
|
||||
; X32-SSE-NEXT: psrld %xmm5, %xmm6
|
||||
@ -1366,7 +1366,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
|
||||
; X32-SSE-NEXT: psubq %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: psrlq %xmm4, %xmm3
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm4, %xmm1
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
|
@ -26,11 +26,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm3, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm6
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
|
||||
@ -38,12 +38,12 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
|
||||
; AVX1-NEXT: vpsubq %xmm4, %xmm8, %xmm6
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm6
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm8, %xmm6
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm7[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
|
||||
|
@ -31,14 +31,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllq %xmm1, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: psllq %xmm1, %xmm5
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrlq %xmm3, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: psrlq %xmm2, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: orpd %xmm5, %xmm0
|
||||
@ -52,14 +52,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: psllq %xmm1, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE41-NEXT: psllq %xmm1, %xmm5
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
|
||||
; SSE41-NEXT: pand %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlq %xmm3, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: psrlq %xmm2, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; SSE41-NEXT: por %xmm5, %xmm0
|
||||
@ -70,14 +70,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
@ -136,14 +136,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||
@ -745,14 +745,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||
|
@ -24,11 +24,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
|
||||
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
|
||||
@ -38,13 +38,13 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -520,7 +520,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm4, %xmm7
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2
|
||||
|
@ -30,7 +30,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: psrlq %xmm2, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE2-NEXT: psrlq %xmm4, %xmm5
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
|
||||
@ -38,7 +38,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE2-NEXT: psubq %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllq %xmm3, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: psllq %xmm3, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
|
||||
; SSE2-NEXT: orpd %xmm5, %xmm0
|
||||
@ -58,7 +58,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlq %xmm2, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE41-NEXT: psrlq %xmm4, %xmm5
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm0[0,1,2,3],xmm5[4,5,6,7]
|
||||
@ -66,7 +66,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; SSE41-NEXT: psubq %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE41-NEXT: psllq %xmm0, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: psllq %xmm0, %xmm3
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
|
||||
; SSE41-NEXT: por %xmm5, %xmm3
|
||||
@ -80,13 +80,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64]
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
@ -215,7 +215,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: psrlq %xmm4, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
|
||||
@ -223,7 +223,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
|
||||
; X32-SSE-NEXT: psubq %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||
@ -251,7 +251,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: psrld %xmm5, %xmm3
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm6
|
||||
; SSE2-NEXT: psrld %xmm5, %xmm6
|
||||
@ -287,7 +287,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE41-NEXT: psrld %xmm0, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE41-NEXT: pshuflw {{.*#+}} xmm5 = xmm0[2,3,3,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm6
|
||||
; SSE41-NEXT: psrld %xmm5, %xmm6
|
||||
@ -469,7 +469,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: psrld %xmm5, %xmm3
|
||||
; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm6
|
||||
; X32-SSE-NEXT: psrld %xmm5, %xmm6
|
||||
@ -1380,7 +1380,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
|
||||
; X32-SSE-NEXT: psubq %xmm4, %xmm5
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psllq %xmm5, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; X32-SSE-NEXT: psllq %xmm5, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm4
|
||||
|
@ -26,11 +26,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm3, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm6
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
|
||||
@ -38,12 +38,12 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
|
||||
; AVX1-NEXT: vpsubq %xmm4, %xmm8, %xmm6
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm6
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm8, %xmm6
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm7[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
|
||||
|
@ -31,14 +31,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlq %xmm1, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: psrlq %xmm1, %xmm5
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: psllq %xmm3, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: psllq %xmm2, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: orpd %xmm5, %xmm0
|
||||
@ -52,14 +52,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: psrlq %xmm1, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE41-NEXT: psrlq %xmm1, %xmm5
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
|
||||
; SSE41-NEXT: pand %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psllq %xmm3, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE41-NEXT: psllq %xmm2, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; SSE41-NEXT: por %xmm5, %xmm0
|
||||
@ -70,14 +70,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
@ -138,14 +138,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psllq %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||
@ -789,14 +789,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X32-SSE-NEXT: psllq %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||
|
@ -24,11 +24,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
|
||||
@ -38,13 +38,13 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm7
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -566,7 +566,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpsllq %xmm2, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpsllq %xmm6, %xmm4, %xmm7
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
|
||||
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm2
|
||||
|
@ -20,7 +20,7 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
|
||||
; SSE2-NEXT: sarq %rdx
|
||||
; SSE2-NEXT: addq %rax, %rdx
|
||||
; SSE2-NEXT: movq %rdx, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: imulq %rcx
|
||||
; SSE2-NEXT: movq %rdx, %rax
|
||||
@ -199,7 +199,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxbw %xmm3, %xmm3
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $8, %xmm3
|
||||
@ -223,7 +223,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
@ -327,7 +327,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
|
||||
@ -371,7 +371,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
@ -457,7 +457,7 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
|
||||
; SSE2-NEXT: subq %rax, %rdx
|
||||
; SSE2-NEXT: addq %rcx, %rdx
|
||||
; SSE2-NEXT: movq %rdx, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: movq %rcx, %rax
|
||||
; SSE2-NEXT: imulq %rsi
|
||||
@ -674,7 +674,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxbw %xmm3, %xmm3
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $8, %xmm3
|
||||
@ -702,7 +702,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
@ -836,7 +836,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; SSE41-NEXT: pmovsxbw %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm4
|
||||
; SSE41-NEXT: psrlw $8, %xmm4
|
||||
@ -879,7 +879,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
|
@ -163,7 +163,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
@ -182,7 +182,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm7, %xmm3
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
@ -260,7 +260,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
@ -289,7 +289,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
@ -574,7 +574,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427]
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
@ -598,7 +598,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
@ -688,7 +688,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
@ -725,7 +725,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpmovsxbw %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
|
||||
|
@ -20,7 +20,7 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: addq %rdx, %rcx
|
||||
; SSE2-NEXT: movq %rcx, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: movq %rcx, %rax
|
||||
; SSE2-NEXT: mulq %rsi
|
||||
@ -445,7 +445,7 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
|
||||
; SSE2-NEXT: subq %rdx, %rax
|
||||
; SSE2-NEXT: addq %rcx, %rax
|
||||
; SSE2-NEXT: movq %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: movq %rcx, %rax
|
||||
; SSE2-NEXT: mulq %rsi
|
||||
|
@ -452,7 +452,7 @@ define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: psrlw $15, %xmm0
|
||||
; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE42-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE42-NEXT: retq
|
||||
|
@ -14,21 +14,21 @@
|
||||
define i64 @test_v2i64(<2 x i64> %a0) {
|
||||
; SSE-LABEL: test_v2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm0, %xmm1
|
||||
; SSE-NEXT: movq %xmm1, %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: retq
|
||||
@ -40,7 +40,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
||||
; SSE-LABEL: test_v4i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddq %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm0, %xmm1
|
||||
; SSE-NEXT: movq %xmm1, %rax
|
||||
; SSE-NEXT: retq
|
||||
@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -84,7 +84,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
||||
; SSE-NEXT: paddq %xmm3, %xmm1
|
||||
; SSE-NEXT: paddq %xmm2, %xmm1
|
||||
; SSE-NEXT: paddq %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm1, %xmm0
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: retq
|
||||
@ -96,7 +96,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -107,7 +107,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -119,7 +119,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -138,7 +138,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
||||
; SSE-NEXT: paddq %xmm4, %xmm2
|
||||
; SSE-NEXT: paddq %xmm3, %xmm2
|
||||
; SSE-NEXT: paddq %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: paddq %xmm2, %xmm0
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: retq
|
||||
@ -156,7 +156,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -169,7 +169,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -182,7 +182,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -236,7 +236,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
|
||||
define i32 @test_v4i32(<4 x i32> %a0) {
|
||||
; SSE-LABEL: test_v4i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: paddd %xmm1, %xmm0
|
||||
@ -245,7 +245,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; AVX1-SLOW-LABEL: test_v4i32:
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -261,7 +261,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; AVX2-LABEL: test_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -270,7 +270,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; AVX512-LABEL: test_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -284,7 +284,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
|
||||
; SSE-LABEL: test_v8i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: paddd %xmm1, %xmm0
|
||||
@ -295,7 +295,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -317,7 +317,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -329,7 +329,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -346,7 +346,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
@ -360,7 +360,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -375,7 +375,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vmovd %xmm0, %eax
|
||||
@ -387,7 +387,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -401,7 +401,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -422,7 +422,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; SSE-NEXT: paddd %xmm4, %xmm2
|
||||
; SSE-NEXT: paddd %xmm3, %xmm2
|
||||
; SSE-NEXT: paddd %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
@ -442,7 +442,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm4, %xmm2, %xmm2
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm2, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -463,7 +463,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm4, %xmm2, %xmm2
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm2, %xmm1
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vmovd %xmm0, %eax
|
||||
@ -477,7 +477,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -492,7 +492,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
@ -608,7 +608,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
|
||||
define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE-LABEL: test_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: paddw %xmm1, %xmm0
|
||||
@ -621,7 +621,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; AVX1-SLOW-LABEL: test_v8i16:
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -642,7 +642,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; AVX2-LABEL: test_v8i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -654,7 +654,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; AVX512-LABEL: test_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -671,7 +671,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE-LABEL: test_v16i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddw %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: paddw %xmm1, %xmm0
|
||||
@ -686,7 +686,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1-SLOW: # %bb.0:
|
||||
; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -713,7 +713,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -728,7 +728,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -748,7 +748,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE-NEXT: paddw %xmm3, %xmm1
|
||||
; SSE-NEXT: paddw %xmm2, %xmm1
|
||||
; SSE-NEXT: paddw %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: paddw %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: paddw %xmm0, %xmm1
|
||||
@ -766,7 +766,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -784,7 +784,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -799,7 +799,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -816,7 +816,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -840,7 +840,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE-NEXT: paddw %xmm4, %xmm2
|
||||
; SSE-NEXT: paddw %xmm3, %xmm2
|
||||
; SSE-NEXT: paddw %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: paddw %xmm2, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: paddw %xmm0, %xmm1
|
||||
@ -864,7 +864,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm4, %xmm2, %xmm2
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm2, %xmm1
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-SLOW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -888,7 +888,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm4, %xmm2, %xmm2
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm2, %xmm1
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -905,7 +905,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -923,7 +923,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
@ -1142,7 +1142,7 @@ define i8 @test_v8i8_load(<8 x i8>* %p) {
|
||||
define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE-LABEL: test_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: psadbw %xmm1, %xmm0
|
||||
@ -1152,7 +1152,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1162,7 +1162,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; AVX512-LABEL: test_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1177,7 +1177,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE-LABEL: test_v32i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddb %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: psadbw %xmm1, %xmm0
|
||||
@ -1189,7 +1189,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1202,7 +1202,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1215,7 +1215,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1233,7 +1233,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE-NEXT: paddb %xmm3, %xmm1
|
||||
; SSE-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: paddb %xmm1, %xmm0
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: psadbw %xmm0, %xmm1
|
||||
@ -1248,7 +1248,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1262,7 +1262,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1277,7 +1277,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1299,7 +1299,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE-NEXT: paddb %xmm4, %xmm2
|
||||
; SSE-NEXT: paddb %xmm3, %xmm2
|
||||
; SSE-NEXT: paddb %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: paddb %xmm2, %xmm0
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: psadbw %xmm0, %xmm1
|
||||
@ -1320,7 +1320,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1336,7 +1336,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
@ -1352,7 +1352,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
|
@ -13,7 +13,7 @@
|
||||
define i1 @test_v2i64(<2 x i64> %a0) {
|
||||
; SSE-LABEL: test_v2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: movq %xmm1, %rax
|
||||
; SSE-NEXT: testq %rax, %rax
|
||||
@ -22,7 +22,7 @@ define i1 @test_v2i64(<2 x i64> %a0) {
|
||||
;
|
||||
; AVX-LABEL: test_v2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: testq %rax, %rax
|
||||
@ -37,7 +37,7 @@ define i1 @test_v4i64(<4 x i64> %a0) {
|
||||
; SSE-LABEL: test_v4i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: movq %xmm1, %rax
|
||||
; SSE-NEXT: testq %rax, %rax
|
||||
@ -48,7 +48,7 @@ define i1 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: testq %rax, %rax
|
||||
@ -60,7 +60,7 @@ define i1 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: testq %rax, %rax
|
||||
@ -72,7 +72,7 @@ define i1 @test_v4i64(<4 x i64> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: testq %rax, %rax
|
||||
@ -90,7 +90,7 @@ define i1 @test_v8i64(<8 x i64> %a0) {
|
||||
; SSE-NEXT: pand %xmm3, %xmm1
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: testq %rax, %rax
|
||||
@ -102,7 +102,7 @@ define i1 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: testq %rax, %rax
|
||||
@ -115,7 +115,7 @@ define i1 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: testq %rax, %rax
|
||||
@ -129,7 +129,7 @@ define i1 @test_v8i64(<8 x i64> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: testq %rax, %rax
|
||||
@ -151,7 +151,7 @@ define i1 @test_v16i64(<16 x i64> %a0) {
|
||||
; SSE-NEXT: pand %xmm4, %xmm2
|
||||
; SSE-NEXT: pand %xmm3, %xmm2
|
||||
; SSE-NEXT: pand %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: testq %rax, %rax
|
||||
@ -165,7 +165,7 @@ define i1 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: testq %rax, %rax
|
||||
@ -180,7 +180,7 @@ define i1 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: testq %rax, %rax
|
||||
@ -195,7 +195,7 @@ define i1 @test_v16i64(<16 x i64> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: testq %rax, %rax
|
||||
@ -237,7 +237,7 @@ define i1 @test_v2i32(<2 x i32> %a0) {
|
||||
define i1 @test_v4i32(<4 x i32> %a0) {
|
||||
; SSE-LABEL: test_v4i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -248,7 +248,7 @@ define i1 @test_v4i32(<4 x i32> %a0) {
|
||||
;
|
||||
; AVX-LABEL: test_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -265,7 +265,7 @@ define i1 @test_v8i32(<8 x i32> %a0) {
|
||||
; SSE-LABEL: test_v8i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -278,7 +278,7 @@ define i1 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -292,7 +292,7 @@ define i1 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -306,7 +306,7 @@ define i1 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -326,7 +326,7 @@ define i1 @test_v16i32(<16 x i32> %a0) {
|
||||
; SSE-NEXT: pand %xmm3, %xmm1
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -340,7 +340,7 @@ define i1 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -355,7 +355,7 @@ define i1 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -371,7 +371,7 @@ define i1 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -395,7 +395,7 @@ define i1 @test_v32i32(<32 x i32> %a0) {
|
||||
; SSE-NEXT: pand %xmm4, %xmm2
|
||||
; SSE-NEXT: pand %xmm3, %xmm2
|
||||
; SSE-NEXT: pand %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -411,7 +411,7 @@ define i1 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -428,7 +428,7 @@ define i1 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -445,7 +445,7 @@ define i1 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -518,7 +518,7 @@ define i1 @test_v4i16(<4 x i16> %a0) {
|
||||
define i1 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE-LABEL: test_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -532,7 +532,7 @@ define i1 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -551,7 +551,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE-LABEL: test_v16i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -567,7 +567,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -583,7 +583,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -599,7 +599,7 @@ define i1 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -621,7 +621,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE-NEXT: pand %xmm3, %xmm1
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -638,7 +638,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -655,7 +655,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -673,7 +673,7 @@ define i1 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -699,7 +699,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE-NEXT: pand %xmm4, %xmm2
|
||||
; SSE-NEXT: pand %xmm3, %xmm2
|
||||
; SSE-NEXT: pand %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -718,7 +718,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -737,7 +737,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -756,7 +756,7 @@ define i1 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -865,7 +865,7 @@ define i1 @test_v8i8(<8 x i8> %a0) {
|
||||
define i1 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE-LABEL: test_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -882,7 +882,7 @@ define i1 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -903,7 +903,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE-LABEL: test_v32i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
@ -922,7 +922,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -940,7 +940,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -958,7 +958,7 @@ define i1 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -982,7 +982,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE-NEXT: pand %xmm3, %xmm1
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -1002,7 +1002,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -1021,7 +1021,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -1041,7 +1041,7 @@ define i1 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -1069,7 +1069,7 @@ define i1 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE-NEXT: pand %xmm4, %xmm2
|
||||
; SSE-NEXT: pand %xmm3, %xmm2
|
||||
; SSE-NEXT: pand %xmm0, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
@ -1091,7 +1091,7 @@ define i1 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
@ -1112,7 +1112,7 @@ define i1 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
@ -1133,7 +1133,7 @@ define i1 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user