1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[X86] LowerBUILD_VECTOR - track zero/nonzero elements with APInt masks. NFCI.

Prep work for undef/zero 'upper elements' handling as proposed in D92645.
This commit is contained in:
Simon Pilgrim 2020-12-14 14:23:27 +00:00
parent 97b075a61c
commit ee5f2e29f2

View File

@ -7982,7 +7982,7 @@ static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
} }
// Use PINSRB/PINSRW/PINSRD to create a build vector. // Use PINSRB/PINSRW/PINSRD to create a build vector.
static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero, unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG, SelectionDAG &DAG,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
@ -7997,7 +7997,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
bool First = true; bool First = true;
for (unsigned i = 0; i < NumElts; ++i) { for (unsigned i = 0; i < NumElts; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0; bool IsNonZero = NonZeroMask[i];
if (!IsNonZero) if (!IsNonZero)
continue; continue;
@ -8024,7 +8024,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
} }
/// Custom lower build_vector of v16i8. /// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero, unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG, SelectionDAG &DAG,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
@ -8033,7 +8033,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// SSE4.1 - use PINSRB to insert each byte directly. // SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41()) if (Subtarget.hasSSE41())
return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget); Subtarget);
SDLoc dl(Op); SDLoc dl(Op);
@ -8041,8 +8041,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW. // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; i += 2) { for (unsigned i = 0; i < 16; i += 2) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; bool ThisIsNonZero = NonZeroMask[i];
bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0; bool NextIsNonZero = NonZeroMask[i + 1];
if (!ThisIsNonZero && !NextIsNonZero) if (!ThisIsNonZero && !NextIsNonZero)
continue; continue;
@ -8090,7 +8090,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
} }
/// Custom lower build_vector of v8i16. /// Custom lower build_vector of v8i16.
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero, unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG, SelectionDAG &DAG,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
@ -8098,7 +8098,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return SDValue(); return SDValue();
// Use PINSRW to insert each byte directly. // Use PINSRW to insert each byte directly.
return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget); Subtarget);
} }
@ -10176,10 +10176,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return BitOp; return BitOp;
unsigned EVTBits = EltVT.getSizeInBits(); unsigned EVTBits = EltVT.getSizeInBits();
APInt ZeroMask = APInt::getNullValue(NumElems);
unsigned NumZero = 0; APInt NonZeroMask = APInt::getNullValue(NumElems);
unsigned NumNonZero = 0;
uint64_t NonZeros = 0;
bool IsAllConstants = true; bool IsAllConstants = true;
SmallSet<SDValue, 8> Values; SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems; unsigned NumConstants = NumElems;
@ -10192,15 +10190,16 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
IsAllConstants = false; IsAllConstants = false;
NumConstants--; NumConstants--;
} }
if (X86::isZeroNode(Elt)) if (X86::isZeroNode(Elt)) {
NumZero++; ZeroMask.setBit(i);
else { } else {
assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. NonZeroMask.setBit(i);
NonZeros |= ((uint64_t)1 << i);
NumNonZero++;
} }
} }
unsigned NumZero = ZeroMask.countPopulation();
unsigned NumNonZero = NonZeroMask.countPopulation();
// All undef vector. Return an UNDEF. All zero vectors were handled above. // All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NumNonZero == 0) if (NumNonZero == 0)
return DAG.getUNDEF(VT); return DAG.getUNDEF(VT);
@ -10267,7 +10266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Special case for single non-zero, non-undef, element. // Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) { if (NumNonZero == 1) {
unsigned Idx = countTrailingZeros(NonZeros); unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx); SDValue Item = Op.getOperand(Idx);
// If we have a constant or non-constant insertion into the low element of // If we have a constant or non-constant insertion into the low element of
@ -10331,7 +10330,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead. // Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1> // shuffle (vload ptr)), undef, <1, 1, 1, 1>
unsigned Idx = countTrailingZeros(NonZeros); unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx); SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode())) if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG); return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
@ -10400,7 +10399,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 64) { if (EVTBits == 64) {
if (NumNonZero == 1) { if (NumNonZero == 1) {
// One half is zero or undef. // One half is zero or undef.
unsigned Idx = countTrailingZeros(NonZeros); unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx)); Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
@ -10410,12 +10409,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector. // If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) if (EVTBits == 8 && NumElems == 16)
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget)) DAG, Subtarget))
return V; return V;
if (EVTBits == 16 && NumElems == 8) if (EVTBits == 16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget)) DAG, Subtarget))
return V; return V;
@ -10428,7 +10427,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumElems == 4 && NumZero > 0) { if (NumElems == 4 && NumZero > 0) {
SmallVector<SDValue, 8> Ops(NumElems); SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < 4; ++i) { for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1ULL << i)); bool isZero = !NonZeroMask[i];
if (isZero) if (isZero)
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
else else
@ -10436,7 +10435,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
} }
for (unsigned i = 0; i < 2; ++i) { for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros >> (i*2)) & 0x3) { switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
default: llvm_unreachable("Unexpected NonZero count"); default: llvm_unreachable("Unexpected NonZero count");
case 0: case 0:
Ops[i] = Ops[i*2]; // Must be a zero vector. Ops[i] = Ops[i*2]; // Must be a zero vector.
@ -10453,8 +10452,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
} }
} }
bool Reverse1 = (NonZeros & 0x3) == 2; bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
int MaskVec[] = { int MaskVec[] = {
Reverse1 ? 1 : 0, Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1, Reverse1 ? 0 : 1,