mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[DAG] Add SimplifyDemandedVectorElts binop SimplifyMultipleUseDemandedBits handling
For the supported binops (basic arithmetic, logicals + shifts), if we fail to simplify the demanded vector elts, then call SimplifyMultipleUseDemandedBits and try to peek through ops to remove unnecessary dependencies. This helps with PR40502. Differential Revision: https://reviews.llvm.org/D79003
This commit is contained in:
parent
02cbd46496
commit
84cbff4065
@ -2225,6 +2225,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
|
||||
bool AssumeSingleUse) const {
|
||||
EVT VT = Op.getValueType();
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
APInt DemandedElts = OriginalDemandedElts;
|
||||
unsigned NumElts = DemandedElts.getBitWidth();
|
||||
assert(VT.isVector() && "Expected vector op");
|
||||
@ -2256,7 +2257,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
SDLoc DL(Op);
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
// Helper for demanding the specified elements and all the bits of both binary
|
||||
// operands.
|
||||
auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
|
||||
unsigned NumBits0 = Op0.getScalarValueSizeInBits();
|
||||
unsigned NumBits1 = Op1.getScalarValueSizeInBits();
|
||||
APInt DemandedBits0 = APInt::getAllOnesValue(NumBits0);
|
||||
APInt DemandedBits1 = APInt::getAllOnesValue(NumBits1);
|
||||
SDValue NewOp0 = SimplifyMultipleUseDemandedBits(
|
||||
Op0, DemandedBits0, DemandedElts, TLO.DAG, Depth + 1);
|
||||
SDValue NewOp1 = SimplifyMultipleUseDemandedBits(
|
||||
Op1, DemandedBits1, DemandedElts, TLO.DAG, Depth + 1);
|
||||
if (NewOp0 || NewOp1) {
|
||||
SDValue NewOp = TLO.DAG.getNode(
|
||||
Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
|
||||
return TLO.CombineTo(Op, NewOp);
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
switch (Opcode) {
|
||||
case ISD::SCALAR_TO_VECTOR: {
|
||||
if (!DemandedElts[0]) {
|
||||
KnownUndef.setAllBits();
|
||||
@ -2635,7 +2655,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: There are more binop opcodes that could be handled here - MUL, MIN,
|
||||
// TODO: There are more binop opcodes that could be handled here - MIN,
|
||||
// MAX, saturated math, etc.
|
||||
case ISD::OR:
|
||||
case ISD::XOR:
|
||||
@ -2646,17 +2666,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
case ISD::FMUL:
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM: {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
|
||||
APInt UndefRHS, ZeroRHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
|
||||
ZeroRHS, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
APInt UndefLHS, ZeroLHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
|
||||
ZeroLHS, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
|
||||
KnownZero = ZeroLHS & ZeroRHS;
|
||||
KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
|
||||
|
||||
// Attempt to avoid multi-use ops if we don't need anything from them.
|
||||
// TODO - use KnownUndef to relax the demandedelts?
|
||||
if (!DemandedElts.isAllOnesValue())
|
||||
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case ISD::SHL:
|
||||
@ -2664,27 +2693,39 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
case ISD::SRA:
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR: {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
|
||||
APInt UndefRHS, ZeroRHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
|
||||
ZeroRHS, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
APInt UndefLHS, ZeroLHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
|
||||
ZeroLHS, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
|
||||
KnownZero = ZeroLHS;
|
||||
KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
|
||||
|
||||
// Attempt to avoid multi-use ops if we don't need anything from them.
|
||||
// TODO - use KnownUndef to relax the demandedelts?
|
||||
if (!DemandedElts.isAllOnesValue())
|
||||
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case ISD::MUL:
|
||||
case ISD::AND: {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
|
||||
APInt SrcUndef, SrcZero;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
|
||||
SrcZero, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
|
||||
KnownZero, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
|
||||
TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
// If either side has a zero element, then the result element is zero, even
|
||||
@ -2694,6 +2735,12 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
KnownZero |= SrcZero;
|
||||
KnownUndef &= SrcUndef;
|
||||
KnownUndef &= ~KnownZero;
|
||||
|
||||
// Attempt to avoid multi-use ops if we don't need anything from them.
|
||||
// TODO - use KnownUndef to relax the demandedelts?
|
||||
if (!DemandedElts.isAllOnesValue())
|
||||
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case ISD::TRUNCATE:
|
||||
|
@ -133,7 +133,7 @@ define <4 x float> @splat0_before_fmul_fmul_constant(<4 x float> %a) {
|
||||
; CHECK-LABEL: splat0_before_fmul_fmul_constant:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov v1.4s, #3.00000000
|
||||
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0]
|
||||
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: fmov v1.4s, #6.00000000
|
||||
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0]
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -187,7 +187,7 @@ define i32 @PR43159(<4 x i32>* %a0) {
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
|
||||
; SSE-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE-NEXT: psubd %xmm3, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
@ -213,7 +213,7 @@ define i32 @PR43159(<4 x i32>* %a0) {
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
@ -238,7 +238,7 @@ define i32 @PR43159(<4 x i32>* %a0) {
|
||||
; AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
@ -263,7 +263,7 @@ define i32 @PR43159(<4 x i32>* %a0) {
|
||||
; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512DQVL-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
|
||||
|
@ -1997,12 +1997,12 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
||||
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm2[1]
|
||||
; SSE2-NEXT: psrad $2, %xmm0
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0,3]
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg:
|
||||
@ -2022,12 +2022,11 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: psrad $2, %xmm3
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: psubd %xmm3, %xmm2
|
||||
; SSE41-NEXT: psrad $3, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2043,12 +2042,11 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
||||
; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg:
|
||||
@ -2057,10 +2055,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
||||
; AVX2ORLATER-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2ORLATER-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX2ORLATER-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX2ORLATER-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2ORLATER-NEXT: vpsubd %xmm0, %xmm1, %xmm1
|
||||
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; AVX2ORLATER-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2ORLATER-NEXT: vpsubd %xmm1, %xmm2, %xmm2
|
||||
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX2ORLATER-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg:
|
||||
@ -2069,10 +2067,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
|
||||
; XOP-NEXT: vpshld {{.*}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; XOP-NEXT: vpshad {{.*}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; XOP-NEXT: retq
|
||||
%1 = sdiv <4 x i32> %x, <i32 1, i32 -4, i32 8, i32 -16>
|
||||
ret <4 x i32> %1
|
||||
|
@ -192,82 +192,67 @@ define void @PR42833() {
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: addl .Lb${{.*}}(%rip), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: movaps {{.*#+}} xmm3 = <u,1,1,1>
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm4
|
||||
; SSE2-NEXT: pslld $23, %xmm3
|
||||
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm3
|
||||
; SSE2-NEXT: cvttps2dq %xmm3, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: pmuludq %xmm3, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pmuludq %xmm3, %xmm6
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3]
|
||||
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm3
|
||||
; SSE2-NEXT: movd %eax, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm3
|
||||
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm5
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movaps %xmm5, .Lc$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm4
|
||||
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm3
|
||||
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5
|
||||
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6
|
||||
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm7
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm7
|
||||
; SSE2-NEXT: psubd %xmm4, %xmm6
|
||||
; SSE2-NEXT: psubd %xmm3, %xmm6
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm5
|
||||
; SSE2-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm7, .Ld$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: paddd %xmm4, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm3, .Lc$local+{{.*}}(%rip)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: PR42833:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
|
||||
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0
|
||||
; SSE42-NEXT: movd %xmm0, %eax
|
||||
; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax
|
||||
; SSE42-NEXT: movdqa {{.*#+}} xmm2 = <u,1,1,1>
|
||||
; SSE42-NEXT: pinsrd $0, %eax, %xmm2
|
||||
; SSE42-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE42-NEXT: paddd %xmm2, %xmm3
|
||||
; SSE42-NEXT: pslld $23, %xmm2
|
||||
; SSE42-NEXT: paddd {{.*}}(%rip), %xmm2
|
||||
; SSE42-NEXT: cvttps2dq %xmm2, %xmm2
|
||||
; SSE42-NEXT: pmulld %xmm0, %xmm2
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3
|
||||
; SSE42-NEXT: psubd %xmm1, %xmm3
|
||||
; SSE42-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
|
||||
; SSE42-NEXT: movd %xmm1, %eax
|
||||
; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax
|
||||
; SSE42-NEXT: movd %eax, %xmm2
|
||||
; SSE42-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3
|
||||
; SSE42-NEXT: psubd %xmm0, %xmm3
|
||||
; SSE42-NEXT: paddd %xmm0, %xmm0
|
||||
; SSE42-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE42-NEXT: paddd %xmm1, %xmm4
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7]
|
||||
; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0
|
||||
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm2
|
||||
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4
|
||||
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5
|
||||
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6
|
||||
; SSE42-NEXT: pinsrd $0, %eax, %xmm0
|
||||
; SSE42-NEXT: psubd %xmm0, %xmm6
|
||||
; SSE42-NEXT: pinsrd $0, %eax, %xmm1
|
||||
; SSE42-NEXT: psubd %xmm1, %xmm6
|
||||
; SSE42-NEXT: psubd %xmm2, %xmm5
|
||||
; SSE42-NEXT: psubd %xmm1, %xmm4
|
||||
; SSE42-NEXT: psubd %xmm0, %xmm4
|
||||
; SSE42-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: paddd %xmm2, %xmm2
|
||||
; SSE42-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: paddd %xmm0, %xmm0
|
||||
; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip)
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
@ -276,17 +261,13 @@ define void @PR42833() {
|
||||
; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: addl .Lb${{.*}}(%rip), %eax
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <u,1,1,1>
|
||||
; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3
|
||||
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpslld $1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2
|
||||
; AVX1-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip)
|
||||
@ -316,10 +297,9 @@ define void @PR42833() {
|
||||
; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0
|
||||
; AVX2-NEXT: addl .Lc$local+{{.*}}(%rip), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0],mem[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vmovdqu %ymm2, .Lc$local+{{.*}}(%rip)
|
||||
; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm2
|
||||
; AVX2-NEXT: vmovdqu .Ld$local+{{.*}}(%rip), %ymm3
|
||||
@ -341,10 +321,9 @@ define void @PR42833() {
|
||||
; AVX512-NEXT: vmovdqu64 .Lc$local+{{.*}}(%rip), %zmm1
|
||||
; AVX512-NEXT: addl .Lc$local+{{.*}}(%rip), %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],mem[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm3
|
||||
; AVX512-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0],ymm0[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2
|
||||
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm2
|
||||
; AVX512-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip)
|
||||
; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0
|
||||
@ -364,14 +343,13 @@ define void @PR42833() {
|
||||
; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0
|
||||
; XOP-NEXT: vmovd %xmm0, %eax
|
||||
; XOP-NEXT: addl .Lb${{.*}}(%rip), %eax
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm1 = <u,1,1,1>
|
||||
; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; XOP-NEXT: vmovd %eax, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2
|
||||
; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3
|
||||
; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm1
|
||||
; XOP-NEXT: vpslld $1, %xmm3, %xmm3
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2
|
||||
; XOP-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2
|
||||
; XOP-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip)
|
||||
|
@ -671,7 +671,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||
define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE-LABEL: splatvar_funnnel_v2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63]
|
||||
; SSE-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE-NEXT: psubq %xmm1, %xmm3
|
||||
@ -683,31 +682,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
; SSE-NEXT: por %xmm4, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsllq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3
|
||||
; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
|
@ -514,9 +514,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
||||
define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
@ -537,7 +537,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
|
||||
|
@ -711,7 +711,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||
define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||
; SSE-LABEL: splatvar_funnnel_v2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63]
|
||||
; SSE-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE-NEXT: psubq %xmm1, %xmm3
|
||||
@ -723,31 +722,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
; SSE-NEXT: por %xmm4, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
|
||||
; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
|
@ -560,9 +560,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
||||
define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
@ -583,7 +583,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
|
||||
|
@ -151,12 +151,11 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) {
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: mulpd %xmm0, %xmm0
|
||||
; SSE-NEXT: mulpd %xmm2, %xmm2
|
||||
; SSE-NEXT: addpd %xmm0, %xmm2
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
|
||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE-NEXT: mulpd %xmm1, %xmm1
|
||||
; SSE-NEXT: addpd %xmm2, %xmm1
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: fmul_v2f64:
|
||||
|
Loading…
x
Reference in New Issue
Block a user