mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[X86] LowerBUILD_VECTOR - track zero/nonzero elements with APInt masks. NFCI.
Prep work for undef/zero 'upper elements' handling as proposed in D92645.
This commit is contained in:
parent
97b075a61c
commit
ee5f2e29f2
@ -7982,7 +7982,7 @@ static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Use PINSRB/PINSRW/PINSRD to create a build vector.
|
// Use PINSRB/PINSRW/PINSRD to create a build vector.
|
||||||
static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
|
static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask,
|
||||||
unsigned NumNonZero, unsigned NumZero,
|
unsigned NumNonZero, unsigned NumZero,
|
||||||
SelectionDAG &DAG,
|
SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
@ -7997,7 +7997,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
|
|||||||
bool First = true;
|
bool First = true;
|
||||||
|
|
||||||
for (unsigned i = 0; i < NumElts; ++i) {
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
bool IsNonZero = (NonZeros & (1 << i)) != 0;
|
bool IsNonZero = NonZeroMask[i];
|
||||||
if (!IsNonZero)
|
if (!IsNonZero)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -8024,7 +8024,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Custom lower build_vector of v16i8.
|
/// Custom lower build_vector of v16i8.
|
||||||
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
|
||||||
unsigned NumNonZero, unsigned NumZero,
|
unsigned NumNonZero, unsigned NumZero,
|
||||||
SelectionDAG &DAG,
|
SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
@ -8033,7 +8033,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||||||
|
|
||||||
// SSE4.1 - use PINSRB to insert each byte directly.
|
// SSE4.1 - use PINSRB to insert each byte directly.
|
||||||
if (Subtarget.hasSSE41())
|
if (Subtarget.hasSSE41())
|
||||||
return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
|
return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
|
||||||
Subtarget);
|
Subtarget);
|
||||||
|
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
@ -8041,8 +8041,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||||||
|
|
||||||
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
|
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
|
||||||
for (unsigned i = 0; i < 16; i += 2) {
|
for (unsigned i = 0; i < 16; i += 2) {
|
||||||
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
|
bool ThisIsNonZero = NonZeroMask[i];
|
||||||
bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
|
bool NextIsNonZero = NonZeroMask[i + 1];
|
||||||
if (!ThisIsNonZero && !NextIsNonZero)
|
if (!ThisIsNonZero && !NextIsNonZero)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -8090,7 +8090,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Custom lower build_vector of v8i16.
|
/// Custom lower build_vector of v8i16.
|
||||||
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
|
static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask,
|
||||||
unsigned NumNonZero, unsigned NumZero,
|
unsigned NumNonZero, unsigned NumZero,
|
||||||
SelectionDAG &DAG,
|
SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
@ -8098,7 +8098,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Use PINSRW to insert each byte directly.
|
// Use PINSRW to insert each byte directly.
|
||||||
return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
|
return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
|
||||||
Subtarget);
|
Subtarget);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10176,10 +10176,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return BitOp;
|
return BitOp;
|
||||||
|
|
||||||
unsigned EVTBits = EltVT.getSizeInBits();
|
unsigned EVTBits = EltVT.getSizeInBits();
|
||||||
|
APInt ZeroMask = APInt::getNullValue(NumElems);
|
||||||
unsigned NumZero = 0;
|
APInt NonZeroMask = APInt::getNullValue(NumElems);
|
||||||
unsigned NumNonZero = 0;
|
|
||||||
uint64_t NonZeros = 0;
|
|
||||||
bool IsAllConstants = true;
|
bool IsAllConstants = true;
|
||||||
SmallSet<SDValue, 8> Values;
|
SmallSet<SDValue, 8> Values;
|
||||||
unsigned NumConstants = NumElems;
|
unsigned NumConstants = NumElems;
|
||||||
@ -10192,15 +10190,16 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
IsAllConstants = false;
|
IsAllConstants = false;
|
||||||
NumConstants--;
|
NumConstants--;
|
||||||
}
|
}
|
||||||
if (X86::isZeroNode(Elt))
|
if (X86::isZeroNode(Elt)) {
|
||||||
NumZero++;
|
ZeroMask.setBit(i);
|
||||||
else {
|
} else {
|
||||||
assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
|
NonZeroMask.setBit(i);
|
||||||
NonZeros |= ((uint64_t)1 << i);
|
|
||||||
NumNonZero++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned NumZero = ZeroMask.countPopulation();
|
||||||
|
unsigned NumNonZero = NonZeroMask.countPopulation();
|
||||||
|
|
||||||
// All undef vector. Return an UNDEF. All zero vectors were handled above.
|
// All undef vector. Return an UNDEF. All zero vectors were handled above.
|
||||||
if (NumNonZero == 0)
|
if (NumNonZero == 0)
|
||||||
return DAG.getUNDEF(VT);
|
return DAG.getUNDEF(VT);
|
||||||
@ -10267,7 +10266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
// Special case for single non-zero, non-undef, element.
|
// Special case for single non-zero, non-undef, element.
|
||||||
if (NumNonZero == 1) {
|
if (NumNonZero == 1) {
|
||||||
unsigned Idx = countTrailingZeros(NonZeros);
|
unsigned Idx = NonZeroMask.countTrailingZeros();
|
||||||
SDValue Item = Op.getOperand(Idx);
|
SDValue Item = Op.getOperand(Idx);
|
||||||
|
|
||||||
// If we have a constant or non-constant insertion into the low element of
|
// If we have a constant or non-constant insertion into the low element of
|
||||||
@ -10331,7 +10330,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
||||||
// Check if it's possible to issue this instead.
|
// Check if it's possible to issue this instead.
|
||||||
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
|
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
|
||||||
unsigned Idx = countTrailingZeros(NonZeros);
|
unsigned Idx = NonZeroMask.countTrailingZeros();
|
||||||
SDValue Item = Op.getOperand(Idx);
|
SDValue Item = Op.getOperand(Idx);
|
||||||
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
|
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
|
||||||
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
|
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
|
||||||
@ -10400,7 +10399,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (EVTBits == 64) {
|
if (EVTBits == 64) {
|
||||||
if (NumNonZero == 1) {
|
if (NumNonZero == 1) {
|
||||||
// One half is zero or undef.
|
// One half is zero or undef.
|
||||||
unsigned Idx = countTrailingZeros(NonZeros);
|
unsigned Idx = NonZeroMask.countTrailingZeros();
|
||||||
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
|
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
|
||||||
Op.getOperand(Idx));
|
Op.getOperand(Idx));
|
||||||
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
|
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
|
||||||
@ -10410,12 +10409,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
// If element VT is < 32 bits, convert it to inserts into a zero vector.
|
// If element VT is < 32 bits, convert it to inserts into a zero vector.
|
||||||
if (EVTBits == 8 && NumElems == 16)
|
if (EVTBits == 8 && NumElems == 16)
|
||||||
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
|
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero,
|
||||||
DAG, Subtarget))
|
DAG, Subtarget))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
if (EVTBits == 16 && NumElems == 8)
|
if (EVTBits == 16 && NumElems == 8)
|
||||||
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
|
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
|
||||||
DAG, Subtarget))
|
DAG, Subtarget))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
@ -10428,7 +10427,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (NumElems == 4 && NumZero > 0) {
|
if (NumElems == 4 && NumZero > 0) {
|
||||||
SmallVector<SDValue, 8> Ops(NumElems);
|
SmallVector<SDValue, 8> Ops(NumElems);
|
||||||
for (unsigned i = 0; i < 4; ++i) {
|
for (unsigned i = 0; i < 4; ++i) {
|
||||||
bool isZero = !(NonZeros & (1ULL << i));
|
bool isZero = !NonZeroMask[i];
|
||||||
if (isZero)
|
if (isZero)
|
||||||
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
|
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
|
||||||
else
|
else
|
||||||
@ -10436,7 +10435,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; ++i) {
|
for (unsigned i = 0; i < 2; ++i) {
|
||||||
switch ((NonZeros >> (i*2)) & 0x3) {
|
switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
|
||||||
default: llvm_unreachable("Unexpected NonZero count");
|
default: llvm_unreachable("Unexpected NonZero count");
|
||||||
case 0:
|
case 0:
|
||||||
Ops[i] = Ops[i*2]; // Must be a zero vector.
|
Ops[i] = Ops[i*2]; // Must be a zero vector.
|
||||||
@ -10453,8 +10452,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Reverse1 = (NonZeros & 0x3) == 2;
|
bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
|
||||||
bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
|
bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
|
||||||
int MaskVec[] = {
|
int MaskVec[] = {
|
||||||
Reverse1 ? 1 : 0,
|
Reverse1 ? 1 : 0,
|
||||||
Reverse1 ? 0 : 1,
|
Reverse1 ? 0 : 1,
|
||||||
|
Loading…
Reference in New Issue
Block a user