diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 418cda63d2b..9c00749bacf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5090,8 +5090,7 @@ static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT, } /// Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with -/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately. +/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>. /// Then bitcast to their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { @@ -5100,13 +5099,7 @@ static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget, APInt Ones = APInt::getAllOnesValue(32); unsigned NumElts = VT.getSizeInBits() / 32; - SDValue Vec; - if (!Subtarget.hasInt256() && NumElts == 8) { - Vec = DAG.getConstant(Ones, dl, MVT::v4i32); - Vec = concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); - } else { - Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); - } + SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); return DAG.getBitcast(VT, Vec); } @@ -30633,20 +30626,9 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { N01 = peekThroughBitcasts(N01); - // Either match a direct AllOnes for 128, 256, and 512-bit vectors, or an - // insert_subvector building a 256-bit AllOnes vector. - if (!ISD::isBuildVectorAllOnes(N01.getNode())) { - if (!VT.is256BitVector() || N01->getOpcode() != ISD::INSERT_SUBVECTOR) - return SDValue(); + if (!ISD::isBuildVectorAllOnes(N01.getNode())) + return SDValue(); - SDValue V1 = N01->getOperand(0); - SDValue V2 = N01->getOperand(1); - if (V1.getOpcode() != ISD::INSERT_SUBVECTOR || - !V1.getOperand(0).isUndef() || - !ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) || - !ISD::isBuildVectorAllOnes(V2.getNode())) - return SDValue(); - } return DAG.getNode(X86ISD::ANDNP, DL, VT, N00, N1); } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 81a61e7975c..922fa69343b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -491,7 +491,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, [(set VR256:$dst, (v8i32 immAllOnesV))]>; } - //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move FP Scalar Instructions // @@ -7862,6 +7861,15 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; } + +// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit +// all ones value. +let Predicates = [HasAVX1Only] in +def : Pat<(v8i32 immAllOnesV), + (VINSERTF128rr + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm), + (V_SETALLONES), 1)>; + multiclass vinsert_lowering { def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),