mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Add PACK instructions to hasUndefRegUpdate so the BreakFalseDeps pass will reassign an undef second source to match the first source
We generate PACK instructions with an undef second source when we are truncating from a 128-bit vector to something narrower and we don't care about the upper bits of the vector register. The register allocation process will always assign untied undef uses to xmm0. This creates a false dependency on xmm0. By adding these instructions to hasUndefRegUpdate, we can get the BreakFalseDeps pass to reassign the source to match the other input. Normally this interface is used for instructions that might need an xor inserted to break the dependency. But the pass also has a heuristic that tries to use the same register as other sources. That should always be possible for these instructions so we'll never trigger the xor dependency break. Differential Revision: https://reviews.llvm.org/D79032
This commit is contained in:
parent
84604c6df9
commit
e744c76052
@ -4832,11 +4832,31 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
|
||||
|
||||
// Return true for any instruction the copies the high bits of the first source
|
||||
// operand into the unused high bits of the destination operand.
|
||||
// Also returns true for instructions that have two inputs where one may
|
||||
// be undef and we want it to use the same register as the other input.
|
||||
static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
|
||||
bool ForLoadFold = false) {
|
||||
// Set the OpNum parameter to the first source operand.
|
||||
OpNum = 1;
|
||||
switch (Opcode) {
|
||||
case X86::PACKSSWBrr:
|
||||
case X86::PACKUSWBrr:
|
||||
case X86::PACKSSDWrr:
|
||||
case X86::PACKUSDWrr:
|
||||
case X86::VPACKSSWBrr:
|
||||
case X86::VPACKUSWBrr:
|
||||
case X86::VPACKSSDWrr:
|
||||
case X86::VPACKUSDWrr:
|
||||
case X86::VPACKSSWBZ128rr:
|
||||
case X86::VPACKUSWBZ128rr:
|
||||
case X86::VPACKSSDWZ128rr:
|
||||
case X86::VPACKUSDWZ128rr:
|
||||
// These instructions are sometimes used with an undef second source to
|
||||
// truncate 128-bit vectors to 64-bit with undefined high bits. Return
|
||||
// true here so BreakFalseDeps will assign this source to the same register
|
||||
// as the first source to avoid a false dependency.
|
||||
OpNum = 2;
|
||||
return true;
|
||||
case X86::VCVTSI2SSrr:
|
||||
case X86::VCVTSI2SSrm:
|
||||
case X86::VCVTSI2SSrr_Int:
|
||||
|
@ -151,7 +151,7 @@ define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind {
|
||||
; SSE2-NEXT: pand %xmm6, %xmm5
|
||||
; SSE2-NEXT: pand %xmm6, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -12,7 +12,7 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
|
||||
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
|
||||
; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
|
||||
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -557,7 +557,7 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
|
||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
|
||||
; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
|
@ -234,7 +234,7 @@ define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -303,7 +303,7 @@ define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-SSSE3-NEXT: por %xmm0, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -380,7 +380,7 @@ define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d,
|
||||
; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
|
||||
; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -461,7 +461,7 @@ define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
|
||||
; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
|
||||
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -512,7 +512,7 @@ define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
|
||||
; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
|
||||
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
@ -577,7 +577,7 @@ define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x fl
|
||||
; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8
|
||||
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
|
@ -27,7 +27,7 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
|
||||
; SSE-NEXT: packssdw %xmm9, %xmm8
|
||||
; SSE-NEXT: packssdw %xmm10, %xmm8
|
||||
; SSE-NEXT: pand %xmm0, %xmm8
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm8
|
||||
; SSE-NEXT: packsswb %xmm8, %xmm8
|
||||
; SSE-NEXT: pmovmskb %xmm8, %eax
|
||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -123,7 +123,7 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double>
|
||||
; SSE-NEXT: packssdw %xmm9, %xmm8
|
||||
; SSE-NEXT: packssdw %xmm10, %xmm8
|
||||
; SSE-NEXT: pand %xmm4, %xmm8
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm8
|
||||
; SSE-NEXT: packsswb %xmm8, %xmm8
|
||||
; SSE-NEXT: pmovmskb %xmm8, %eax
|
||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE-NEXT: retq
|
||||
|
@ -117,7 +117,7 @@ define i8 @v8f32(<8 x float> %a, <8 x float> %b) {
|
||||
; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
|
||||
; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
|
@ -354,7 +354,7 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b) {
|
||||
; SSE-NEXT: cmpltpd %xmm0, %xmm4
|
||||
; SSE-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE-NEXT: packssdw %xmm6, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -608,7 +608,7 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) {
|
||||
; SSE-NEXT: pcmpgtq %xmm0, %xmm4
|
||||
; SSE-NEXT: packssdw %xmm2, %xmm4
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE-NEXT: movb %al, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
|
@ -653,7 +653,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
||||
; SSE2-NEXT: movd %eax, %xmm3
|
||||
; SSE2-NEXT: pmullw %xmm0, %xmm3
|
||||
; SSE2-NEXT: psrlw $8, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm3
|
||||
; SSE2-NEXT: psrlw $7, %xmm3
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
@ -669,7 +669,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE41-NEXT: psrlw $7, %xmm2
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
@ -684,7 +684,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: vpmullw %xmm1, %xmm2, %xmm1
|
||||
; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
|
@ -15,7 +15,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> %
|
||||
; SSE-LABEL: compressstore_v8f64_v8i1:
|
||||
; SSE: ## %bb.0:
|
||||
; SSE-NEXT: psllw $15, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: jne LBB0_1
|
||||
@ -84,7 +84,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> %
|
||||
; AVX1-LABEL: compressstore_v8f64_v8i1:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: je LBB0_2
|
||||
@ -152,7 +152,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> %
|
||||
; AVX2-LABEL: compressstore_v8f64_v8i1:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: je LBB0_2
|
||||
@ -845,7 +845,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
|
||||
; SSE2-LABEL: compressstore_v8f32_v8i1:
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: psllw $15, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB4_1
|
||||
@ -924,7 +924,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
|
||||
; SSE42-LABEL: compressstore_v8f32_v8i1:
|
||||
; SSE42: ## %bb.0:
|
||||
; SSE42-NEXT: psllw $15, %xmm2
|
||||
; SSE42-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE42-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE42-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE42-NEXT: testb $1, %al
|
||||
; SSE42-NEXT: jne LBB4_1
|
||||
@ -993,7 +993,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
|
||||
; AVX1-LABEL: compressstore_v8f32_v8i1:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: jne LBB4_1
|
||||
@ -1064,7 +1064,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
|
||||
; AVX2-LABEL: compressstore_v8f32_v8i1:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: jne LBB4_1
|
||||
@ -2729,7 +2729,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE2-LABEL: compressstore_v8i64_v8i1:
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: psllw $15, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB9_1
|
||||
@ -2802,7 +2802,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; SSE42-LABEL: compressstore_v8i64_v8i1:
|
||||
; SSE42: ## %bb.0:
|
||||
; SSE42-NEXT: psllw $15, %xmm4
|
||||
; SSE42-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE42-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE42-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE42-NEXT: testb $1, %al
|
||||
; SSE42-NEXT: jne LBB9_1
|
||||
@ -2871,7 +2871,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; AVX1-LABEL: compressstore_v8i64_v8i1:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: je LBB9_2
|
||||
@ -2939,7 +2939,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
||||
; AVX2-LABEL: compressstore_v8i64_v8i1:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: je LBB9_2
|
||||
@ -3182,7 +3182,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB11_1
|
||||
@ -3260,7 +3260,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig
|
||||
; SSE42: ## %bb.0:
|
||||
; SSE42-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE42-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE42-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE42-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE42-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE42-NEXT: testb $1, %al
|
||||
; SSE42-NEXT: jne LBB11_1
|
||||
@ -3330,7 +3330,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig
|
||||
; AVX1OR2: ## %bb.0:
|
||||
; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1OR2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1OR2-NEXT: testb $1, %al
|
||||
; AVX1OR2-NEXT: jne LBB11_1
|
||||
|
@ -296,7 +296,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8
|
||||
; SSE-LABEL: expandload_v8f64_v8i1:
|
||||
; SSE: ## %bb.0:
|
||||
; SSE-NEXT: psllw $15, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: jne LBB2_1
|
||||
@ -365,7 +365,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8
|
||||
; AVX1-LABEL: expandload_v8f64_v8i1:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: jne LBB2_1
|
||||
@ -446,7 +446,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8
|
||||
; AVX2-LABEL: expandload_v8f64_v8i1:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: jne LBB2_1
|
||||
@ -2954,7 +2954,7 @@ define <8 x i16> @expandload_v8i16_v8i16(i16* %base, <8 x i16> %src0, <8 x i16>
|
||||
; SSE: ## %bb.0:
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: jne LBB11_1
|
||||
@ -3024,7 +3024,7 @@ define <8 x i16> @expandload_v8i16_v8i16(i16* %base, <8 x i16> %src0, <8 x i16>
|
||||
; AVX1OR2: ## %bb.0:
|
||||
; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1OR2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1OR2-NEXT: testb $1, %al
|
||||
; AVX1OR2-NEXT: jne LBB11_1
|
||||
|
@ -1099,7 +1099,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; SSE-NEXT: pcmpeqd %xmm5, %xmm6
|
||||
; SSE-NEXT: pcmpeqd %xmm3, %xmm5
|
||||
; SSE-NEXT: packssdw %xmm5, %xmm6
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm6
|
||||
; SSE-NEXT: packsswb %xmm6, %xmm6
|
||||
; SSE-NEXT: pmovmskb %xmm6, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: je .LBB4_19
|
||||
@ -1168,7 +1168,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
|
||||
; SSE-NEXT: pcmpeqd %xmm7, %xmm2
|
||||
; SSE-NEXT: pcmpeqd %xmm7, %xmm3
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: je .LBB4_37
|
||||
|
@ -393,7 +393,7 @@ define <8 x double> @load_v8f64_v8i16(<8 x i16> %trigger, <8 x double>* %addr, <
|
||||
; SSE: ## %bb.0:
|
||||
; SSE-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE-NEXT: pcmpeqw %xmm0, %xmm5
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm5
|
||||
; SSE-NEXT: packsswb %xmm5, %xmm5
|
||||
; SSE-NEXT: pmovmskb %xmm5, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: jne LBB5_1
|
||||
@ -541,7 +541,7 @@ define <8 x double> @load_v8f64_v8i64(<8 x i64> %trigger, <8 x double>* %addr, <
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB6_1
|
||||
@ -1645,7 +1645,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE2-NEXT: pcmpeqw %xmm0, %xmm5
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: packsswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: pmovmskb %xmm5, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB15_1
|
||||
@ -1716,7 +1716,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6
|
||||
; SSE42: ## %bb.0:
|
||||
; SSE42-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE42-NEXT: pcmpeqw %xmm0, %xmm5
|
||||
; SSE42-NEXT: packsswb %xmm0, %xmm5
|
||||
; SSE42-NEXT: packsswb %xmm5, %xmm5
|
||||
; SSE42-NEXT: pmovmskb %xmm5, %eax
|
||||
; SSE42-NEXT: testb $1, %al
|
||||
; SSE42-NEXT: jne LBB15_1
|
||||
@ -1864,7 +1864,7 @@ define <8 x i64> @load_v8i64_v8i64(<8 x i64> %trigger, <8 x i64>* %addr, <8 x i6
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB16_1
|
||||
|
@ -353,7 +353,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, <8 x float>* %ptr, <8 x float> %y
|
||||
; SSE2-LABEL: store_v8f32_v8i32:
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB5_1
|
||||
@ -425,7 +425,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, <8 x float>* %ptr, <8 x float> %y
|
||||
; SSE4-LABEL: store_v8f32_v8i32:
|
||||
; SSE4: ## %bb.0:
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne LBB5_1
|
||||
@ -1423,7 +1423,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, <8 x i16>* %addr, <8 x i16> %
|
||||
; SSE2: ## %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: pcmpeqw %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne LBB13_1
|
||||
@ -1494,7 +1494,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, <8 x i16>* %addr, <8 x i16> %
|
||||
; SSE4: ## %bb.0:
|
||||
; SSE4-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE4-NEXT: pcmpeqw %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne LBB13_1
|
||||
|
@ -19,7 +19,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB0_1
|
||||
@ -94,7 +94,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB0_1
|
||||
@ -232,7 +232,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB1_1
|
||||
@ -315,7 +315,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB1_1
|
||||
@ -624,7 +624,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -702,7 +702,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: pcmpeqd %xmm6, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB2_1
|
||||
@ -3331,7 +3331,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB11_1
|
||||
@ -3410,7 +3410,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB11_1
|
||||
@ -3714,7 +3714,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -3787,7 +3787,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB12_1
|
||||
@ -6269,7 +6269,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -6336,7 +6336,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE4-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE4-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE4-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE4-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB17_1
|
||||
@ -6402,7 +6402,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX-NEXT: testb $1, %al
|
||||
; AVX-NEXT: jne .LBB17_1
|
||||
|
@ -119,7 +119,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: je .LBB0_2
|
||||
@ -228,7 +228,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB0_1
|
||||
@ -506,7 +506,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB1_1
|
||||
@ -618,7 +618,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB1_1
|
||||
@ -1062,7 +1062,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -1161,14 +1161,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm10, %xmm1
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE4-NEXT: packssdw %xmm1, %xmm7
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm7
|
||||
; SSE4-NEXT: packsswb %xmm7, %xmm7
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB2_1
|
||||
@ -1842,7 +1842,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask
|
||||
; SSE4-NEXT: pcmpgtq %xmm1, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm1
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE4-NEXT: packssdw %xmm0, %xmm1
|
||||
; SSE4-NEXT: packssdw %xmm1, %xmm1
|
||||
; SSE4-NEXT: pcmpeqd %xmm2, %xmm4
|
||||
; SSE4-NEXT: movmskps %xmm4, %eax
|
||||
; SSE4-NEXT: xorl $15, %eax
|
||||
@ -4238,7 +4238,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB11_1
|
||||
@ -4315,7 +4315,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB11_1
|
||||
@ -4619,7 +4619,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -4690,7 +4690,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB12_1
|
||||
@ -7234,7 +7234,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -7301,7 +7301,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE4-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE4-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE4-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE4-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB17_1
|
||||
@ -7367,7 +7367,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX-NEXT: testb $1, %al
|
||||
; AVX-NEXT: jne .LBB17_1
|
||||
|
@ -65,7 +65,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: je .LBB0_2
|
||||
@ -167,7 +167,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm9, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB0_1
|
||||
@ -390,7 +390,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB1_1
|
||||
@ -495,7 +495,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB1_1
|
||||
@ -870,14 +870,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm7
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm7
|
||||
; SSE2-NEXT: packuswb %xmm7, %xmm7
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm5
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm7, %ecx
|
||||
@ -969,14 +969,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm7, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm6, %xmm1
|
||||
; SSE4-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE4-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm4
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE4-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE4-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE4-NEXT: pmovmskb %xmm4, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB2_1
|
||||
@ -1579,7 +1579,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask
|
||||
; SSE4-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE4-NEXT: packusdw %xmm3, %xmm5
|
||||
; SSE4-NEXT: packusdw %xmm0, %xmm5
|
||||
; SSE4-NEXT: packusdw %xmm5, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm2, %xmm6
|
||||
; SSE4-NEXT: movmskps %xmm6, %eax
|
||||
; SSE4-NEXT: xorl $15, %eax
|
||||
@ -3958,7 +3958,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm6, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: jne .LBB11_1
|
||||
@ -4038,7 +4038,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB11_1
|
||||
@ -4352,14 +4352,14 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pandn %xmm9, %xmm6
|
||||
; SSE2-NEXT: por %xmm0, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm6
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm3
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm3
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm6, %ecx
|
||||
@ -4433,7 +4433,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE4-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE4-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE4-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE4-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB12_1
|
||||
@ -7057,7 +7057,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
@ -7125,7 +7125,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; SSE4-NEXT: pcmpeqw %xmm1, %xmm2
|
||||
; SSE4-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE4-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE4-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE4-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE4-NEXT: testb $1, %al
|
||||
; SSE4-NEXT: jne .LBB17_1
|
||||
@ -7192,7 +7192,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
|
||||
; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX-NEXT: testb $1, %al
|
||||
; AVX-NEXT: jne .LBB17_1
|
||||
|
@ -696,7 +696,7 @@ define void @interleave_24i8_out(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,3,2,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,6,5,4,7]
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,255,255,0,255,255,0,255,255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm6
|
||||
; SSE2-NEXT: pand %xmm5, %xmm6
|
||||
@ -712,7 +712,7 @@ define void @interleave_24i8_out(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,3,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[1,2,3,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,5,6,7,4]
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm6, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm1, %xmm6
|
||||
@ -833,7 +833,7 @@ define void @interleave_24i8_in(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,1,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,3,3,4,5,6,7]
|
||||
|
@ -348,7 +348,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
|
||||
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
|
||||
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
|
||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3
|
||||
; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
|
||||
@ -363,7 +363,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
|
||||
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpackssdw %xmm0, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpackssdw %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
|
||||
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
|
@ -546,7 +546,7 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -1237,7 +1237,7 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -551,7 +551,7 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -635,7 +635,7 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -1050,11 +1050,11 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6
|
||||
; AVX2-NEXT: vpackssdw %xmm6, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpacksswb %xmm5, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmovsxbd %xmm5, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm4, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm4, %xmm4, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -678,7 +678,7 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
||||
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %ymm2, (%rdi)
|
||||
|
@ -1100,7 +1100,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
||||
; SSE-LABEL: bool_reduction_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: cmpb $-1, %al
|
||||
; SSE-NEXT: sete %al
|
||||
@ -1227,7 +1227,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
|
||||
; SSE-NEXT: pminud %xmm0, %xmm2
|
||||
; SSE-NEXT: pcmpeqd %xmm0, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: cmpb $-1, %al
|
||||
; SSE-NEXT: sete %al
|
||||
|
@ -996,7 +996,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
||||
; SSE-LABEL: bool_reduction_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: setne %al
|
||||
@ -1123,7 +1123,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
|
||||
; SSE-NEXT: pminud %xmm0, %xmm2
|
||||
; SSE-NEXT: pcmpeqd %xmm0, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: setne %al
|
||||
|
@ -317,7 +317,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: psllw $7, %xmm4
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7]
|
||||
; SSE41-NEXT: psrlw $8, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
@ -816,7 +816,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: psllw $7, %xmm4
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7]
|
||||
; SSE41-NEXT: psrlw $8, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm4
|
||||
; SSE41-NEXT: psrlw $8, %xmm4
|
||||
|
@ -511,7 +511,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: psllw $15, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: cmpb $-1, %al
|
||||
; SSE2-NEXT: sete %al
|
||||
@ -1404,7 +1404,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: cmpb $-1, %al
|
||||
; SSE2-NEXT: sete %al
|
||||
|
@ -505,7 +505,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: psllw $15, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: setne %al
|
||||
@ -1393,7 +1393,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: setne %al
|
||||
|
@ -554,7 +554,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: psllw $15, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: xorb $0, %al
|
||||
; SSE2-NEXT: setnp %al
|
||||
@ -1582,7 +1582,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: xorb $0, %al
|
||||
; SSE2-NEXT: setnp %al
|
||||
|
@ -38,14 +38,14 @@ define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16
|
||||
; SSE41-LABEL: blend_packusdw_packuswb:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm0
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: blend_packusdw_packuswb:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpackuswb %xmm0, %xmm2, %xmm1
|
||||
; AVX-NEXT: vpackuswb %xmm2, %xmm2, %xmm1
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX-NEXT: retq
|
||||
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
|
||||
|
@ -1512,7 +1512,7 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) {
|
||||
; SSE41-NEXT: por %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1766,7 +1766,7 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
|
||||
; SSE41-NEXT: por %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2870,8 +2870,8 @@ define void @trunc_packus_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) {
|
||||
; SSE2-NEXT: pand %xmm3, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
@ -3313,8 +3313,8 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) {
|
||||
; SSE2-NEXT: pand %xmm8, %xmm2
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: movd %xmm2, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3838,7 +3838,7 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm5, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -4030,7 +4030,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm3
|
||||
; SSE2-NEXT: movq %xmm3, (%rsi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -4142,7 +4142,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l
|
||||
; SSSE3-NEXT: pand %xmm1, %xmm2
|
||||
; SSSE3-NEXT: packuswb %xmm4, %xmm2
|
||||
; SSSE3-NEXT: packuswb %xmm2, %xmm3
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSSE3-NEXT: packuswb %xmm3, %xmm3
|
||||
; SSSE3-NEXT: movq %xmm3, (%rsi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -4240,7 +4240,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm5, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: movq %xmm4, (%rsi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -5148,7 +5148,7 @@ define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width"
|
||||
; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -5230,8 +5230,8 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
|
||||
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -5256,8 +5256,8 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
|
||||
; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pmaxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -1508,7 +1508,7 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) {
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1643,7 +1643,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: por %xmm3, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm6, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1706,7 +1706,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
|
||||
; SSSE3-NEXT: pandn %xmm0, %xmm1
|
||||
; SSSE3-NEXT: por %xmm3, %xmm1
|
||||
; SSSE3-NEXT: packssdw %xmm6, %xmm1
|
||||
; SSSE3-NEXT: packssdw %xmm0, %xmm1
|
||||
; SSSE3-NEXT: packssdw %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -1756,7 +1756,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm0, %xmm1
|
||||
; SSE41-NEXT: packssdw %xmm1, %xmm1
|
||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2610,8 +2610,8 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) {
|
||||
; SSE2-NEXT: por %xmm3, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
@ -3590,7 +3590,7 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"=
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE41-NEXT: packssdw %xmm3, %xmm5
|
||||
; SSE41-NEXT: packssdw %xmm5, %xmm1
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3791,7 +3791,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm4, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm6
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm6
|
||||
; SSE2-NEXT: packsswb %xmm6, %xmm6
|
||||
; SSE2-NEXT: movq %xmm6, (%rsi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3912,7 +3912,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg
|
||||
; SSSE3-NEXT: por %xmm1, %xmm2
|
||||
; SSSE3-NEXT: packssdw %xmm4, %xmm2
|
||||
; SSSE3-NEXT: packssdw %xmm2, %xmm6
|
||||
; SSSE3-NEXT: packsswb %xmm0, %xmm6
|
||||
; SSSE3-NEXT: packsswb %xmm6, %xmm6
|
||||
; SSSE3-NEXT: movq %xmm6, (%rsi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -4010,7 +4010,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE41-NEXT: packssdw %xmm2, %xmm5
|
||||
; SSE41-NEXT: packssdw %xmm5, %xmm4
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: packsswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: movq %xmm4, (%rsi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -5024,8 +5024,8 @@ define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: por %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -1046,7 +1046,7 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
|
||||
; SSE41-NEXT: por %xmm5, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm6, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1221,7 +1221,7 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) {
|
||||
; SSE41-NEXT: por %xmm5, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
|
||||
; SSE41-NEXT: packusdw %xmm6, %xmm3
|
||||
; SSE41-NEXT: packusdw %xmm0, %xmm3
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm3
|
||||
; SSE41-NEXT: movq %xmm3, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2127,8 +2127,8 @@ define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) {
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: movd %xmm2, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
@ -2431,8 +2431,8 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) {
|
||||
; SSE2-NEXT: pand %xmm8, %xmm0
|
||||
; SSE2-NEXT: pand %xmm8, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm3
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm3
|
||||
; SSE2-NEXT: movd %xmm3, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2767,7 +2767,7 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64>* %p0) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm4, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2903,7 +2903,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: por %xmm9, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: movq %xmm5, (%rsi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2970,7 +2970,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) {
|
||||
; SSSE3-NEXT: por %xmm9, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSSE3-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSSE3-NEXT: movq %xmm5, (%rsi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -3027,7 +3027,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm6
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm6
|
||||
; SSE41-NEXT: packuswb %xmm6, %xmm6
|
||||
; SSE41-NEXT: movq %xmm6, (%rsi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3698,8 +3698,8 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
|
||||
; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: movd %xmm2, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3907,7 +3907,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||
; SSE2-NEXT: por %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: movq %xmm5, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3929,7 +3929,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
||||
; SSSE3-NEXT: pandn %xmm2, %xmm5
|
||||
; SSSE3-NEXT: por %xmm0, %xmm5
|
||||
; SSSE3-NEXT: packuswb %xmm6, %xmm5
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSSE3-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSSE3-NEXT: movq %xmm5, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user