mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] SimplifyMultipleUseDemandedBits - Add target shuffle support
llvm-svn: 367782
This commit is contained in:
parent
303bb3ab3e
commit
607c9c137e
@ -34706,7 +34706,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
|
||||
SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
|
||||
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
|
||||
SelectionDAG &DAG, unsigned Depth) const {
|
||||
int NumElts = DemandedElts.getBitWidth();
|
||||
unsigned Opc = Op.getOpcode();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
switch (Opc) {
|
||||
case X86ISD::PINSRB:
|
||||
case X86ISD::PINSRW: {
|
||||
@ -34721,6 +34724,49 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<int, 16> ShuffleMask;
|
||||
SmallVector<SDValue, 2> ShuffleOps;
|
||||
if (VT.isSimple() && VT.isVector() &&
|
||||
resolveTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) {
|
||||
// If all the demanded elts are from one operand and are inline,
|
||||
// then we can use the operand directly.
|
||||
int NumOps = ShuffleOps.size();
|
||||
if (ShuffleMask.size() == NumElts &&
|
||||
llvm::all_of(ShuffleOps, [VT](SDValue V) {
|
||||
return VT.getSizeInBits() == V.getValueSizeInBits();
|
||||
})) {
|
||||
|
||||
// Bitmask that indicates which ops have only been accessed 'inline'.
|
||||
APInt IdentityOp = APInt::getAllOnesValue(NumOps);
|
||||
bool AllUndef = true;
|
||||
|
||||
for (int i = 0; i != NumElts; ++i) {
|
||||
int M = ShuffleMask[i];
|
||||
if (SM_SentinelUndef == M || !DemandedElts[i])
|
||||
continue;
|
||||
AllUndef = false;
|
||||
int Op = M / NumElts;
|
||||
int Index = M % NumElts;
|
||||
if (M < 0 || Index != i) {
|
||||
IdentityOp.clearAllBits();
|
||||
break;
|
||||
}
|
||||
IdentityOp &= APInt::getOneBitSet(NumOps, Op);
|
||||
if (IdentityOp == 0)
|
||||
break;
|
||||
}
|
||||
assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) &&
|
||||
"Multiple identity shuffles detected");
|
||||
|
||||
if (AllUndef)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
for (int i = 0; i != NumOps; ++i)
|
||||
if (IdentityOp[i])
|
||||
return DAG.getBitcast(VT, ShuffleOps[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
|
||||
Op, DemandedBits, DemandedElts, DAG, Depth);
|
||||
}
|
||||
|
@ -562,143 +562,145 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, <6 x i32>* %p2) noun
|
||||
; SSE2-LABEL: smulo_v6i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %rdi, %rax
|
||||
; SSE2-NEXT: movd %r8d, %xmm9
|
||||
; SSE2-NEXT: movd %r8d, %xmm8
|
||||
; SSE2-NEXT: movd %ecx, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
|
||||
; SSE2-NEXT: movd %edx, %xmm6
|
||||
; SSE2-NEXT: movd %esi, %xmm5
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
|
||||
; SSE2-NEXT: movd %edx, %xmm3
|
||||
; SSE2-NEXT: movd %esi, %xmm6
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm10 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
||||
; SSE2-NEXT: movd %r9d, %xmm12
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm11 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm12 = xmm12[0],xmm11[0],xmm12[1],xmm11[1]
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm8 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm12 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSE2-NEXT: movd %r9d, %xmm13
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1]
|
||||
; SSE2-NEXT: movdqa %xmm13, %xmm11
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm11 = xmm11[0],xmm2[0],xmm11[1],xmm2[1]
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm9 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: pmuludq %xmm7, %xmm13
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm9[0],xmm7[1],xmm9[1]
|
||||
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm5, %xmm5
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
|
||||
; SSE2-NEXT: pand %xmm6, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
|
||||
; SSE2-NEXT: pand %xmm3, %xmm1
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pmuludq %xmm5, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,3,2,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm10[0,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm9[0,0]
|
||||
; SSE2-NEXT: pmuludq %xmm7, %xmm6
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,2,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm7, (%rcx)
|
||||
; SSE2-NEXT: psrad $31, %xmm7
|
||||
; SSE2-NEXT: pcmpeqd %xmm5, %xmm7
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm7
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand %xmm12, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm12, %xmm4
|
||||
; SSE2-NEXT: pand %xmm2, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm4
|
||||
; SSE2-NEXT: pmuludq %xmm12, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
|
||||
; SSE2-NEXT: pmuludq %xmm8, %xmm11
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSE2-NEXT: psubd %xmm4, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm6, %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: paddd %xmm5, %xmm0
|
||||
; SSE2-NEXT: pmuludq %xmm6, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm12 = xmm12[0,0],xmm10[0,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm8[0,0]
|
||||
; SSE2-NEXT: pmuludq %xmm12, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movdqa %xmm0, (%rcx)
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm5, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm7, %xmm3
|
||||
; SSE2-NEXT: pand %xmm11, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm11, %xmm4
|
||||
; SSE2-NEXT: pand %xmm7, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm4
|
||||
; SSE2-NEXT: pmuludq %xmm9, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm13[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
|
||||
; SSE2-NEXT: psubd %xmm4, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; SSE2-NEXT: movq %xmm2, 16(%rcx)
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm2
|
||||
; SSE2-NEXT: movq %xmm2, 16(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm7, (%rdi)
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm13[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
; SSE2-NEXT: movq %xmm3, 16(%rcx)
|
||||
; SSE2-NEXT: psrad $31, %xmm3
|
||||
; SSE2-NEXT: pcmpeqd %xmm5, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm3
|
||||
; SSE2-NEXT: movq %xmm3, 16(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: smulo_v6i32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movq %rdi, %rax
|
||||
; SSSE3-NEXT: movd %r8d, %xmm9
|
||||
; SSSE3-NEXT: movd %r8d, %xmm8
|
||||
; SSSE3-NEXT: movd %ecx, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
|
||||
; SSSE3-NEXT: movd %edx, %xmm6
|
||||
; SSSE3-NEXT: movd %esi, %xmm5
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
|
||||
; SSSE3-NEXT: movd %edx, %xmm3
|
||||
; SSSE3-NEXT: movd %esi, %xmm6
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm10 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
||||
; SSSE3-NEXT: movd %r9d, %xmm12
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm11 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm12 = xmm12[0],xmm11[0],xmm12[1],xmm11[1]
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm8 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm12 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSSE3-NEXT: movd %r9d, %xmm13
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1]
|
||||
; SSSE3-NEXT: movdqa %xmm13, %xmm11
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm11 = xmm11[0],xmm2[0],xmm11[1],xmm2[1]
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm9 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: pmuludq %xmm7, %xmm13
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm9[0],xmm7[1],xmm9[1]
|
||||
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm4
|
||||
; SSSE3-NEXT: pxor %xmm5, %xmm5
|
||||
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm0
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
|
||||
; SSSE3-NEXT: pand %xmm5, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1
|
||||
; SSSE3-NEXT: pand %xmm3, %xmm1
|
||||
; SSSE3-NEXT: paddd %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pmuludq %xmm5, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,3,2,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm10[0,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm9[0,0]
|
||||
; SSSE3-NEXT: pmuludq %xmm7, %xmm6
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
|
||||
; SSSE3-NEXT: psubd %xmm1, %xmm5
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,2,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm7, (%rcx)
|
||||
; SSSE3-NEXT: psrad $31, %xmm7
|
||||
; SSSE3-NEXT: pcmpeqd %xmm5, %xmm7
|
||||
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm7
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pand %xmm12, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm12, %xmm4
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm4
|
||||
; SSSE3-NEXT: paddd %xmm1, %xmm4
|
||||
; SSSE3-NEXT: pmuludq %xmm12, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
|
||||
; SSSE3-NEXT: pmuludq %xmm8, %xmm11
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSSE3-NEXT: psubd %xmm4, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
|
||||
; SSSE3-NEXT: pand %xmm1, %xmm0
|
||||
; SSSE3-NEXT: paddd %xmm5, %xmm0
|
||||
; SSSE3-NEXT: pmuludq %xmm6, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm12 = xmm12[0,0],xmm10[0,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm8[0,0]
|
||||
; SSSE3-NEXT: pmuludq %xmm12, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
|
||||
; SSSE3-NEXT: psubd %xmm0, %xmm5
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: movdqa %xmm0, (%rcx)
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0
|
||||
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm3, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm7, %xmm3
|
||||
; SSSE3-NEXT: pand %xmm11, %xmm3
|
||||
; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4
|
||||
; SSSE3-NEXT: pand %xmm7, %xmm4
|
||||
; SSSE3-NEXT: paddd %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pmuludq %xmm9, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm13[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
|
||||
; SSSE3-NEXT: psubd %xmm4, %xmm5
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; SSSE3-NEXT: movq %xmm2, 16(%rcx)
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm2
|
||||
; SSSE3-NEXT: movq %xmm2, 16(%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm7, (%rdi)
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm13[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
; SSSE3-NEXT: movq %xmm3, 16(%rcx)
|
||||
; SSSE3-NEXT: psrad $31, %xmm3
|
||||
; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm3
|
||||
; SSSE3-NEXT: movq %xmm3, 16(%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: smulo_v6i32:
|
||||
|
@ -1589,9 +1589,8 @@ define i8 @test_v4i8(<4 x i8> %a0) {
|
||||
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrb $0, %xmm1, %eax
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1603,8 +1602,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
|
||||
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
|
||||
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1616,8 +1614,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
|
||||
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
|
||||
; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1650,15 +1647,13 @@ define i8 @test_v8i8(<8 x i8> %a0) {
|
||||
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[10],zero,xmm1[14],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrb $0, %xmm1, %eax
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1670,14 +1665,12 @@ define i8 @test_v8i8(<8 x i8> %a0) {
|
||||
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX-NEXT: vpmullw %xmm0, %xmm2, %xmm0
|
||||
; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
|
||||
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1689,14 +1682,12 @@ define i8 @test_v8i8(<8 x i8> %a0) {
|
||||
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpmullw %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
|
||||
; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
|
Loading…
x
Reference in New Issue
Block a user