mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Replace more calls to getZeroVector with regular getConstant.
getZeroVector produces a specifically canonicalized zero vector, but we can just let DAG legalization take care of it. The test changes are because MULH lowering happens later than it should and this change gave us the opportunity to constant fold away a multiply during a DAG combine before the build_vector got legalized with a bitcast. llvm-svn: 347290
This commit is contained in:
parent
66f26fb6eb
commit
1f41c9410a
@ -15926,7 +15926,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Build a mask by testing the condition against zero.
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
|
||||
SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
|
||||
getZeroVector(CondVT, Subtarget, DAG, dl),
|
||||
DAG.getConstant(0, dl, CondVT),
|
||||
ISD::SETNE);
|
||||
// Now return a new VSELECT using the mask.
|
||||
return DAG.getSelect(dl, VT, Mask, LHS, RHS);
|
||||
@ -17953,10 +17953,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
||||
}
|
||||
// If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
|
||||
if (Subtarget.hasDQI())
|
||||
return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
|
||||
In, ISD::SETGT);
|
||||
return DAG.getSetCC(DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL),
|
||||
ISD::SETNE);
|
||||
return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
|
||||
return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -19131,7 +19129,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
|
||||
|
||||
SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
|
||||
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
|
||||
getZeroVector(VT, Subtarget, DAG, dl));
|
||||
DAG.getConstant(0, dl, VT));
|
||||
}
|
||||
|
||||
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
||||
@ -23101,7 +23099,7 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
|
||||
// we just take the hi result (by masking the lo result to zero before the
|
||||
// add).
|
||||
SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
|
||||
SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);
|
||||
SDValue Zero = DAG.getConstant(0, DL, CurrVT);
|
||||
|
||||
SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
|
||||
SDValue Lo = Op0;
|
||||
@ -23498,7 +23496,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero);
|
||||
bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero);
|
||||
|
||||
SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
|
||||
// Only multiply lo/hi halves that aren't known to be zero.
|
||||
SDValue AloBlo = Zero;
|
||||
@ -23846,8 +23844,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||
if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
|
||||
assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&
|
||||
"Unsupported PCMPGT op");
|
||||
return DAG.getNode(X86ISD::PCMPGT, dl, VT,
|
||||
getZeroVector(VT, Subtarget, DAG, dl), R);
|
||||
return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);
|
||||
}
|
||||
|
||||
if (ShiftAmt >= 32) {
|
||||
@ -23903,7 +23900,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
// ashr(R, 7) === cmp_slt(R, 0)
|
||||
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
|
||||
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
SDValue Zeros = DAG.getConstant(0, dl, VT);
|
||||
if (VT.is512BitVector()) {
|
||||
assert(VT == MVT::v64i8 && "Unexpected element type!");
|
||||
SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
|
||||
@ -24144,7 +24141,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
|
||||
|
||||
// AVX2 can more effectively perform this as a zext/trunc to/from v8i32.
|
||||
if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {
|
||||
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
SDValue Z = DAG.getConstant(0, dl, VT);
|
||||
SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
|
||||
SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
|
||||
Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG);
|
||||
@ -24338,7 +24335,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// just zero-extending, but for SSE just duplicating the top 16-bits is
|
||||
// cheaper and has the same effect for out of range values.
|
||||
if (Subtarget.hasAVX()) {
|
||||
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
SDValue Z = DAG.getConstant(0, dl, VT);
|
||||
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
|
||||
Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
|
||||
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
|
||||
@ -24473,7 +24470,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// On pre-SSE41 targets we test for the sign bit by comparing to
|
||||
// zero - a negative value will set all bits of the lanes to true
|
||||
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
|
||||
SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
|
||||
SDValue Z = DAG.getConstant(0, dl, SelVT);
|
||||
SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
|
||||
return DAG.getSelect(dl, SelVT, C, V0, V1);
|
||||
};
|
||||
@ -24555,7 +24552,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
|
||||
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
|
||||
MVT ExtVT = MVT::v8i32;
|
||||
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
SDValue Z = DAG.getConstant(0, dl, VT);
|
||||
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
|
||||
SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
|
||||
SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
|
||||
@ -24757,7 +24754,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// On pre-SSE41 targets we test for the sign bit by comparing to
|
||||
// zero - a negative value will set all bits of the lanes to true
|
||||
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
|
||||
SDValue Z = getZeroVector(SelVT, Subtarget, DAG, DL);
|
||||
SDValue Z = DAG.getConstant(0, DL, SelVT);
|
||||
SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
|
||||
return DAG.getSelect(DL, SelVT, C, V0, V1);
|
||||
};
|
||||
@ -33015,7 +33012,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
|
||||
// ready for the PHMINPOS.
|
||||
if (ExtractVT == MVT::i8) {
|
||||
SDValue Upper = DAG.getVectorShuffle(
|
||||
SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
|
||||
SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8),
|
||||
{1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
|
||||
MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
|
||||
}
|
||||
@ -35325,7 +35322,7 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Shift zero -> zero.
|
||||
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
|
||||
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
|
||||
return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
APInt KnownUndef, KnownZero;
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
@ -35357,7 +35354,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
||||
APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
|
||||
if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
|
||||
if (LogicalShift)
|
||||
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
|
||||
return DAG.getConstant(0, SDLoc(N), VT);
|
||||
else
|
||||
ShiftVal = NumBitsPerElt - 1;
|
||||
}
|
||||
@ -35368,7 +35365,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Shift zero -> zero.
|
||||
if (ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
|
||||
return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
// fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
|
||||
// This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
|
||||
@ -38538,7 +38535,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// ANDNP(x, 0) -> 0
|
||||
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
|
||||
return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
|
||||
return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
// Turn ANDNP back to AND if input is inverted.
|
||||
if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
|
||||
@ -40541,9 +40538,9 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
if (N->getOperand(0) == N->getOperand(1)) {
|
||||
if (N->getOpcode() == X86ISD::PCMPEQ)
|
||||
return getOnesVector(VT, DAG, DL);
|
||||
return DAG.getConstant(-1, DL, VT);
|
||||
if (N->getOpcode() == X86ISD::PCMPGT)
|
||||
return getZeroVector(VT, Subtarget, DAG, DL);
|
||||
return DAG.getConstant(0, DL, VT);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -3054,38 +3054,33 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) {
|
||||
define <16 x i8> @pr38658(<16 x i8> %x) {
|
||||
; SSE2-LABEL: pr38658:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: psraw $8, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
; SSE2-NEXT: pmullw %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrlw $8, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
|
||||
; SSE2-NEXT: psraw $8, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmullw %xmm3, %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: psraw $8, %xmm1
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: psrlw $8, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
; SSE2-NEXT: pmullw %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrlw $8, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm1
|
||||
; SSE2-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; SSE2-NEXT: psraw $8, %xmm0
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
|
||||
; SSE2-NEXT: psraw $8, %xmm2
|
||||
; SSE2-NEXT: psllw $8, %xmm2
|
||||
; SSE2-NEXT: psrlw $8, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrlw $7, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: paddb %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrlw $7, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: pr38658:
|
||||
|
Loading…
Reference in New Issue
Block a user