diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 79a3fe00e8c..f60f019e775 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15926,7 +15926,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { // Build a mask by testing the condition against zero. MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, - getZeroVector(CondVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, CondVT), ISD::SETNE); // Now return a new VSELECT using the mask. return DAG.getSelect(dl, VT, Mask, LHS, RHS); @@ -17953,10 +17953,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, } // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m. if (Subtarget.hasDQI()) - return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), - In, ISD::SETGT); - return DAG.getSetCC(DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL), - ISD::SETNE); + return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); + return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); } SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { @@ -19131,7 +19129,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1); return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, - getZeroVector(VT, Subtarget, DAG, dl)); + DAG.getConstant(0, dl, VT)); } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, @@ -23101,7 +23099,7 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, // we just take the hi result (by masking the lo result to zero before the // add). SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0)); - SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL); + SDValue Zero = DAG.getConstant(0, DL, CurrVT); SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT); SDValue Lo = Op0; @@ -23498,7 +23496,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero); bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero); - SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl); + SDValue Zero = DAG.getConstant(0, dl, VT); // Only multiply lo/hi halves that aren't known to be zero. SDValue AloBlo = Zero; @@ -23846,8 +23844,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (ShiftAmt == 63 && Subtarget.hasSSE42()) { assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && "Unsupported PCMPGT op"); - return DAG.getNode(X86ISD::PCMPGT, dl, VT, - getZeroVector(VT, Subtarget, DAG, dl), R); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); } if (ShiftAmt >= 32) { @@ -23903,7 +23900,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // ashr(R, 7) === cmp_slt(R, 0) if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { - SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + SDValue Zeros = DAG.getConstant(0, dl, VT); if (VT.is512BitVector()) { assert(VT == MVT::v64i8 && "Unexpected element type!"); SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); @@ -24144,7 +24141,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, // AVX2 can more effectively perform this as a zext/trunc to/from v8i32. if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { - SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); + SDValue Z = DAG.getConstant(0, dl, VT); SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG); @@ -24338,7 +24335,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // just zero-extending, but for SSE just duplicating the top 16-bits is // cheaper and has the same effect for out of range values. if (Subtarget.hasAVX()) { - SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); + SDValue Z = DAG.getConstant(0, dl, VT); Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); @@ -24473,7 +24470,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. - SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); + SDValue Z = DAG.getConstant(0, dl, SelVT); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); return DAG.getSelect(dl, SelVT, C, V0, V1); }; @@ -24555,7 +24552,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; - SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); + SDValue Z = DAG.getConstant(0, dl, VT); SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z); SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z); SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R); @@ -24757,7 +24754,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. - SDValue Z = getZeroVector(SelVT, Subtarget, DAG, DL); + SDValue Z = DAG.getConstant(0, DL, SelVT); SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel); return DAG.getSelect(DL, SelVT, C, V0, V1); }; @@ -33015,7 +33012,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, // ready for the PHMINPOS. if (ExtractVT == MVT::i8) { SDValue Upper = DAG.getVectorShuffle( - SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL), + SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8), {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); } @@ -35325,7 +35322,7 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, // Shift zero -> zero. if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) - return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); + return DAG.getConstant(0, SDLoc(N), VT); APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -35357,7 +35354,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, APInt ShiftVal = cast(N1)->getAPIntValue(); if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) { if (LogicalShift) - return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); + return DAG.getConstant(0, SDLoc(N), VT); else ShiftVal = NumBitsPerElt - 1; } @@ -35368,7 +35365,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, // Shift zero -> zero. if (ISD::isBuildVectorAllZeros(N0.getNode())) - return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); + return DAG.getConstant(0, SDLoc(N), VT); // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31). // This VSRLI only looks at the sign bit, which is unmodified by VSRAI. @@ -38538,7 +38535,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, // ANDNP(x, 0) -> 0 if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) - return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); + return DAG.getConstant(0, SDLoc(N), VT); // Turn ANDNP back to AND if input is inverted. if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR && @@ -40541,9 +40538,9 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, if (N->getOperand(0) == N->getOperand(1)) { if (N->getOpcode() == X86ISD::PCMPEQ) - return getOnesVector(VT, DAG, DL); + return DAG.getConstant(-1, DL, VT); if (N->getOpcode() == X86ISD::PCMPGT) - return getZeroVector(VT, Subtarget, DAG, DL); + return DAG.getConstant(0, DL, VT); } return SDValue(); diff --git a/test/CodeGen/X86/combine-sdiv.ll b/test/CodeGen/X86/combine-sdiv.ll index 46851659138..ff750340f84 100644 --- a/test/CodeGen/X86/combine-sdiv.ll +++ b/test/CodeGen/X86/combine-sdiv.ll @@ -3054,38 +3054,33 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) { define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE2-LABEL: pr38658: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: pmullw %xmm0, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pmullw %xmm3, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: packuswb %xmm2, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE2-NEXT: psraw $8, %xmm2 +; SSE2-NEXT: pmullw %xmm1, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; SSE2-NEXT: psraw $8, %xmm0 +; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: packuswb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $7, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: psrlw $7, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: pr38658: