1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[x86] Teach a bunch of the x86-specific shuffle combining to work with

256-bit vectors as well as 128-bit vectors. Fixes some of the redundant
shuffles for v16i16.

llvm-svn: 230752
This commit is contained in:
Chandler Carruth 2015-02-27 11:45:13 +00:00
parent a1c6bfd527
commit 36698dfd52
2 changed files with 29 additions and 15 deletions

View File

@ -19435,12 +19435,26 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
/// PSHUF-style masks that can be reused with such instructions.
static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
// If we have more than 128-bits, only the low 128-bits of shuffle mask
// matter. Check that the upper masks are repeats and remove them.
if (VT.getSizeInBits() > 128) {
int LaneElts = 128 / VT.getScalarSizeInBits();
#ifndef NDEBUG
for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
for (int j = 0; j < LaneElts; ++j)
assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
"Mask doesn't repeat in high 128-bit lanes!");
#endif
Mask.resize(LaneElts);
}
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
return Mask;
@ -19513,7 +19527,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
V.getSimpleValueType().getScalarType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
@ -19687,8 +19702,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
assert(VT == MVT::v8i16);
(void)VT;
assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
@ -19701,12 +19715,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DAG));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
return DAG.getNode(ISD::BITCAST, DL, VT, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
@ -19741,11 +19756,11 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
std::equal(std::begin(MappedMask), std::end(MappedMask),
std::begin(UnpackHiMask))) {
// We can replace all three shuffles with an unpack.
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
DL, MVT::v8i16, V, V);
DL, VT, V, V);
}
}
}
@ -19893,10 +19908,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
}
}
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
@ -19914,6 +19925,10 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
if (Shuffle.getNode())
return Shuffle;
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
// Try recursively combining arbitrary sequences of x86 shuffle
// instructions into higher-order shuffles. We do this after combining
// specific PSHUF instruction sequences into their minimal form so that we

View File

@ -2380,8 +2380,7 @@ define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_u
; AVX2: # BB#0:
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,1,2,0,7,5,6,4]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,3,2,3,4,5,6,7,8,11,10,11,12,13,14,15]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,10,11,8,9,12,13,14,15]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,0,2,3,6,4,6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %shuffle