1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86] Fold bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y)) for int-int bitcasts

Extend the existing combine that handles bitcasting for fp-logic ops to also help remove logic ops across bitcasts to/from the same integer types.

This helps improve AVX512 predicate handling for D/Q logic ops and also allows DAGCombine's scalarizeExtractedBinop to remove some annoying gpr->simd->gpr transfers.

The concat_vectors regression in pr40891.ll will be addressed in a followup commit on this patch.

Differential Revision: https://reviews.llvm.org/D96206
This commit is contained in:
Simon Pilgrim 2021-02-21 14:40:54 +00:00
parent f5a3ff6669
commit 1640de7ef3
20 changed files with 135 additions and 170 deletions

View File

@ -39599,6 +39599,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Try to match patterns such as
// (i16 bitcast (v16i1 x))
@ -39656,8 +39657,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// If we're bitcasting from iX to vXi1, see if the integer originally
// began as a vXi1 and whether we can remove the bitcast entirely.
if (VT.isVector() && VT.getScalarType() == MVT::i1 &&
SrcVT.isScalarInteger() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) {
if (SDValue V =
combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget))
return V;
@ -39845,8 +39845,11 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
default: return SDValue();
}
// Check if we have a bitcast from another integer type as well.
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64)))
(Subtarget.hasSSE2() && VT == MVT::f64) ||
(Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() &&
TLI.isTypeLegal(VT))))
return SDValue();
SDValue LogicOp0 = N0.getOperand(0);
@ -39855,17 +39858,21 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
LogicOp0.hasOneUse() && LogicOp0.getOperand(0).hasOneUse() &&
LogicOp0.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();
return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
}
// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
LogicOp1.hasOneUse() && LogicOp1.getOperand(0).hasOneUse() &&
LogicOp1.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();
return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
}
return SDValue();

View File

@ -911,7 +911,7 @@ define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
@ -933,7 +933,7 @@ define <8 x i64> @ternlog_maskz_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
@ -977,7 +977,7 @@ define <16 x i32> @ternlog_masky_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpord %zmm1, %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL-NEXT: retq
@ -1023,7 +1023,7 @@ define <8 x i64> @ternlog_masky_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL-NEXT: retq

View File

@ -341,7 +341,7 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
; X86-NEXT: vpsrad $29, %xmm0, %xmm0
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)

View File

@ -1671,7 +1671,7 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq

View File

@ -688,7 +688,7 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq

View File

@ -688,7 +688,7 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind !prof !14 {
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq

View File

@ -1650,7 +1650,7 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq

View File

@ -217,7 +217,7 @@ define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline s
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
; X86-AVX512F-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
; X86-AVX512F-NEXT: vpandd {{\.LCPI.*}}, %zmm0, %zmm0
; X86-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
%ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3

View File

@ -8,9 +8,11 @@ define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,2],ymm0[0,2],ymm2[4,6],ymm0[4,6]
; CHECK-NEXT: retl
%a = shufflevector <4 x i64> %y, <4 x i64> <i64 12345, i64 67890, i64 13579, i64 24680>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b = and <8 x i64> %x, %a

View File

@ -678,38 +678,16 @@ define <4 x i32> @PR19721(<4 x i32> %i) {
; X86-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: PR19721:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X86-AVX-NEXT: retl
; AVX-LABEL: PR19721:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: ret{{[l|q]}}
;
; X64-SSE-LABEL: PR19721:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movq %xmm0, %rax
; X64-SSE-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-SSE-NEXT: andq %rax, %rcx
; X64-SSE-NEXT: movq %rcx, %xmm1
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: retq
;
; X64-AVX1-LABEL: PR19721:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-AVX1-NEXT: andq %rax, %rcx
; X64-AVX1-NEXT: vmovq %rcx, %xmm1
; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; X64-AVX1-NEXT: retq
;
; X64-AVX512-LABEL: PR19721:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-AVX512-NEXT: andq %rax, %rcx
; X64-AVX512-NEXT: vmovq %rcx, %xmm1
; X64-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; X64-AVX512-NEXT: retq
%bc = bitcast <4 x i32> %i to i128
%insert = and i128 %bc, -4294967296
%bc2 = bitcast i128 %insert to <4 x i32>

View File

@ -813,10 +813,9 @@ define <2 x i32> @saddo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
; SSE-NEXT: pxor %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm3, %xmm4
; SSE-NEXT: pcmpgtd %xmm2, %xmm4
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm3, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE-NEXT: pand %xmm5, %xmm2
; SSE-NEXT: pand %xmm4, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
; SSE-NEXT: por %xmm2, %xmm3
; SSE-NEXT: pxor %xmm2, %xmm2

View File

@ -819,19 +819,17 @@ define <2 x i32> @ssubo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm4
; SSE-NEXT: pcmpgtd %xmm0, %xmm4
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm3, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE-NEXT: pand %xmm5, %xmm0
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
; SSE-NEXT: por %xmm0, %xmm3
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE-NEXT: pand %xmm4, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: pxor %xmm3, %xmm0

View File

@ -249,9 +249,9 @@ define void @test_udiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
define void @test_urem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
; X64-LABEL: test_urem_pow2_v2i32:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: andps {{.*}}(%rip), %xmm0
; X64-NEXT: movlps %xmm0, (%rsi)
; X64-NEXT: movabsq $30064771079, %rax # imm = 0x700000007
; X64-NEXT: andq (%rdi), %rax
; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: retq
;
; X86-LABEL: test_urem_pow2_v2i32:

View File

@ -477,7 +477,7 @@ define i1 @test_v2i16(<2 x i16> %a0) {
; AVX-LABEL: test_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: sete %al
@ -505,7 +505,7 @@ define i1 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: setne %al
@ -537,7 +537,7 @@ define i1 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: sete %al

View File

@ -407,7 +407,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-LABEL: test_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -432,7 +432,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -461,7 +461,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq

View File

@ -407,7 +407,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-LABEL: test_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -432,7 +432,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -461,7 +461,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq

View File

@ -407,7 +407,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-LABEL: test_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -432,7 +432,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -461,7 +461,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq

View File

@ -37,10 +37,9 @@ define <2 x i32> @trunc_packus_v2i64_v2i32(<2 x i64> %a0) {
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pand %xmm3, %xmm1
@ -68,10 +67,9 @@ define <2 x i32> @trunc_packus_v2i64_v2i32(<2 x i64> %a0) {
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm0
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSSE3-NEXT: por %xmm0, %xmm1
; SSSE3-NEXT: pand %xmm3, %xmm1
@ -186,10 +184,9 @@ define void @trunc_packus_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pand %xmm3, %xmm1
@ -218,10 +215,9 @@ define void @trunc_packus_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm0
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSSE3-NEXT: por %xmm0, %xmm1
; SSSE3-NEXT: pand %xmm3, %xmm1
@ -354,10 +350,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm5, %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pand %xmm4, %xmm1
@ -365,10 +360,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm5, %xmm2
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: pand %xmm3, %xmm0
@ -410,10 +404,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
; SSSE3-NEXT: pxor %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm5, %xmm0
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: por %xmm0, %xmm1
; SSSE3-NEXT: pand %xmm4, %xmm1
@ -421,10 +414,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
; SSSE3-NEXT: pxor %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm5, %xmm2
; SSSE3-NEXT: pand %xmm4, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSSE3-NEXT: por %xmm2, %xmm0
; SSSE3-NEXT: pand %xmm3, %xmm0
@ -584,9 +576,9 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSE2-NEXT: movdqa 32(%rdi), %xmm6
; SSE2-NEXT: movdqa 48(%rdi), %xmm9
; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648]
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm3, %xmm2
; SSE2-NEXT: pxor %xmm11, %xmm2
; SSE2-NEXT: pxor %xmm0, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483647,2147483647]
; SSE2-NEXT: movdqa %xmm10, %xmm5
; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
@ -600,7 +592,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSE2-NEXT: pandn %xmm8, %xmm2
; SSE2-NEXT: por %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm7, %xmm1
; SSE2-NEXT: pxor %xmm11, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm10, %xmm3
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
@ -613,7 +605,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSE2-NEXT: pandn %xmm8, %xmm3
; SSE2-NEXT: por %xmm7, %xmm3
; SSE2-NEXT: movdqa %xmm6, %xmm1
; SSE2-NEXT: pxor %xmm11, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm10, %xmm4
; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
@ -626,7 +618,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSE2-NEXT: pandn %xmm8, %xmm7
; SSE2-NEXT: por %xmm6, %xmm7
; SSE2-NEXT: movdqa %xmm9, %xmm1
; SSE2-NEXT: pxor %xmm11, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm10, %xmm4
; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
@ -639,51 +631,47 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSE2-NEXT: pandn %xmm8, %xmm4
; SSE2-NEXT: por %xmm9, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm1
; SSE2-NEXT: pxor %xmm11, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: pcmpgtd %xmm11, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm11, %xmm1
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm6, %xmm1
; SSE2-NEXT: pand %xmm5, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm5
; SSE2-NEXT: pand %xmm4, %xmm5
; SSE2-NEXT: movdqa %xmm7, %xmm1
; SSE2-NEXT: pxor %xmm11, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm11, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm11, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: por %xmm6, %xmm1
; SSE2-NEXT: pand %xmm7, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2]
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm11, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pcmpgtd %xmm11, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm11, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm5, %xmm0
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pxor %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pcmpeqd %xmm0, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: pand %xmm3, %xmm4
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm11, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: pcmpgtd %xmm11, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm11, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm5, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
; SSE2-NEXT: por %xmm6, %xmm0
; SSE2-NEXT: pand %xmm5, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pand %xmm3, %xmm5
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pxor %xmm0, %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_packus_v8i64_v8i32:
@ -693,9 +681,9 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSSE3-NEXT: movdqa 32(%rdi), %xmm6
; SSSE3-NEXT: movdqa 48(%rdi), %xmm9
; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648]
; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
; SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSSE3-NEXT: pxor %xmm11, %xmm2
; SSSE3-NEXT: pxor %xmm0, %xmm2
; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483647,2147483647]
; SSSE3-NEXT: movdqa %xmm10, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
@ -709,7 +697,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSSE3-NEXT: pandn %xmm8, %xmm2
; SSSE3-NEXT: por %xmm3, %xmm2
; SSSE3-NEXT: movdqa %xmm7, %xmm1
; SSSE3-NEXT: pxor %xmm11, %xmm1
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm10, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
@ -722,7 +710,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSSE3-NEXT: pandn %xmm8, %xmm3
; SSSE3-NEXT: por %xmm7, %xmm3
; SSSE3-NEXT: movdqa %xmm6, %xmm1
; SSSE3-NEXT: pxor %xmm11, %xmm1
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm10, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
@ -735,7 +723,7 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSSE3-NEXT: pandn %xmm8, %xmm7
; SSSE3-NEXT: por %xmm6, %xmm7
; SSSE3-NEXT: movdqa %xmm9, %xmm1
; SSSE3-NEXT: pxor %xmm11, %xmm1
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm10, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
@ -748,51 +736,47 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; SSSE3-NEXT: pandn %xmm8, %xmm4
; SSSE3-NEXT: por %xmm9, %xmm4
; SSSE3-NEXT: movdqa %xmm4, %xmm1
; SSSE3-NEXT: pxor %xmm11, %xmm1
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm11, %xmm5
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm6, %xmm1
; SSSE3-NEXT: pand %xmm5, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSSE3-NEXT: por %xmm1, %xmm5
; SSSE3-NEXT: pand %xmm4, %xmm5
; SSSE3-NEXT: movdqa %xmm7, %xmm1
; SSSE3-NEXT: pxor %xmm11, %xmm1
; SSSE3-NEXT: pxor %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm6, %xmm0
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm6
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSSE3-NEXT: por %xmm0, %xmm1
; SSSE3-NEXT: por %xmm6, %xmm1
; SSSE3-NEXT: pand %xmm7, %xmm1
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2]
; SSSE3-NEXT: movdqa %xmm3, %xmm0
; SSSE3-NEXT: pxor %xmm11, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm11, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm5, %xmm0
; SSSE3-NEXT: movdqa %xmm3, %xmm4
; SSSE3-NEXT: pxor %xmm0, %xmm4
; SSSE3-NEXT: movdqa %xmm4, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
; SSSE3-NEXT: por %xmm0, %xmm4
; SSSE3-NEXT: pand %xmm3, %xmm4
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm11, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm11, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
; SSSE3-NEXT: pcmpeqd %xmm11, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
; SSSE3-NEXT: pand %xmm5, %xmm6
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
; SSSE3-NEXT: por %xmm6, %xmm0
; SSSE3-NEXT: pand %xmm5, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSSE3-NEXT: por %xmm4, %xmm5
; SSSE3-NEXT: pand %xmm3, %xmm5
; SSSE3-NEXT: movdqa %xmm2, %xmm3
; SSSE3-NEXT: pxor %xmm0, %xmm3
; SSSE3-NEXT: movdqa %xmm3, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; SSSE3-NEXT: pand %xmm4, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSSE3-NEXT: por %xmm3, %xmm0
; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc_packus_v8i64_v8i32:

View File

@ -15,9 +15,9 @@ define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
;
; X64-LABEL: convert:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq %rsi, %xmm0
; X64-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movabsq $140733193388287, %rax ## imm = 0x7FFF000000FF
; X64-NEXT: xorq %rsi, %rax
; X64-NEXT: movq %rax, (%rdi)
; X64-NEXT: retq
entry:
%conv = bitcast i64 %src to <2 x i32>

View File

@ -394,14 +394,11 @@ define <4 x i32> @test10(<4 x i32> %a) nounwind {
define i32 @PR17487(i1 %tobool) {
; X86-LABEL: PR17487:
; X86: # %bb.0:
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT: pandn {{\.LCPI.*}}, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm0, %ecx
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: notb %cl
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $1, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb $1, %cl
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LIN-LABEL: PR17487: