diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dade38c0538..e4fcafc3352 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7982,7 +7982,7 @@ static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, } // Use PINSRB/PINSRW/PINSRD to create a build vector. -static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, +static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -7997,7 +7997,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, bool First = true; for (unsigned i = 0; i < NumElts; ++i) { - bool IsNonZero = (NonZeros & (1 << i)) != 0; + bool IsNonZero = NonZeroMask[i]; if (!IsNonZero) continue; @@ -8024,7 +8024,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, } /// Custom lower build_vector of v16i8. -static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, +static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -8033,7 +8033,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, // SSE4.1 - use PINSRB to insert each byte directly. if (Subtarget.hasSSE41()) - return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, + return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, Subtarget); SDLoc dl(Op); @@ -8041,8 +8041,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; i += 2) { - bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; - bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0; + bool ThisIsNonZero = NonZeroMask[i]; + bool NextIsNonZero = NonZeroMask[i + 1]; if (!ThisIsNonZero && !NextIsNonZero) continue; @@ -8090,7 +8090,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } /// Custom lower build_vector of v8i16. -static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, +static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -8098,7 +8098,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, return SDValue(); // Use PINSRW to insert each byte directly. - return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, + return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, Subtarget); } @@ -10176,10 +10176,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return BitOp; unsigned EVTBits = EltVT.getSizeInBits(); - - unsigned NumZero = 0; - unsigned NumNonZero = 0; - uint64_t NonZeros = 0; + APInt ZeroMask = APInt::getNullValue(NumElems); + APInt NonZeroMask = APInt::getNullValue(NumElems); bool IsAllConstants = true; SmallSet Values; unsigned NumConstants = NumElems; @@ -10192,15 +10190,16 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { IsAllConstants = false; NumConstants--; } - if (X86::isZeroNode(Elt)) - NumZero++; - else { - assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. - NonZeros |= ((uint64_t)1 << i); - NumNonZero++; + if (X86::isZeroNode(Elt)) { + ZeroMask.setBit(i); + } else { + NonZeroMask.setBit(i); } } + unsigned NumZero = ZeroMask.countPopulation(); + unsigned NumNonZero = NonZeroMask.countPopulation(); + // All undef vector. Return an UNDEF. All zero vectors were handled above. if (NumNonZero == 0) return DAG.getUNDEF(VT); @@ -10267,7 +10266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { - unsigned Idx = countTrailingZeros(NonZeros); + unsigned Idx = NonZeroMask.countTrailingZeros(); SDValue Item = Op.getOperand(Idx); // If we have a constant or non-constant insertion into the low element of @@ -10331,7 +10330,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> // Check if it's possible to issue this instead. // shuffle (vload ptr)), undef, <1, 1, 1, 1> - unsigned Idx = countTrailingZeros(NonZeros); + unsigned Idx = NonZeroMask.countTrailingZeros(); SDValue Item = Op.getOperand(Idx); if (Op.getNode()->isOnlyUserOf(Item.getNode())) return LowerAsSplatVectorLoad(Item, VT, dl, DAG); @@ -10400,7 +10399,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (EVTBits == 64) { if (NumNonZero == 1) { // One half is zero or undef. - unsigned Idx = countTrailingZeros(NonZeros); + unsigned Idx = NonZeroMask.countTrailingZeros(); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); @@ -10410,12 +10409,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) - if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, + if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero, DAG, Subtarget)) return V; if (EVTBits == 16 && NumElems == 8) - if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, + if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero, DAG, Subtarget)) return V; @@ -10428,7 +10427,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (NumElems == 4 && NumZero > 0) { SmallVector Ops(NumElems); for (unsigned i = 0; i < 4; ++i) { - bool isZero = !(NonZeros & (1ULL << i)); + bool isZero = !NonZeroMask[i]; if (isZero) Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); else @@ -10436,7 +10435,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } for (unsigned i = 0; i < 2; ++i) { - switch ((NonZeros >> (i*2)) & 0x3) { + switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) { default: llvm_unreachable("Unexpected NonZero count"); case 0: Ops[i] = Ops[i*2]; // Must be a zero vector. @@ -10453,8 +10452,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } } - bool Reverse1 = (NonZeros & 0x3) == 2; - bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; + bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2; + bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2; int MaskVec[] = { Reverse1 ? 1 : 0, Reverse1 ? 0 : 1,