mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[x86] Add another combine that is particularly useful for the new vector
shuffle lowering: match shuffle patterns equivalent to an unpcklwd or unpckhwd instruction. This allows us to use generic lowering code for v8i16 shuffles and match the unpack pattern late. llvm-svn: 212705
This commit is contained in:
parent
e97885896b
commit
b0b7a4016c
@ -18694,6 +18694,47 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
|
||||
}
|
||||
|
||||
// Look for shuffle patterns which can be implemented as a single unpack.
|
||||
// FIXME: This doesn't handle the location of the PSHUFD generically, and
|
||||
// only works when we have a PSHUFD followed by two half-shuffles.
|
||||
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
|
||||
(V.getOpcode() == X86ISD::PSHUFLW ||
|
||||
V.getOpcode() == X86ISD::PSHUFHW) &&
|
||||
V.getOpcode() != N.getOpcode() &&
|
||||
V.hasOneUse()) {
|
||||
SDValue D = V.getOperand(0);
|
||||
while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
|
||||
D = D.getOperand(0);
|
||||
if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
|
||||
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
|
||||
SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
|
||||
int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
|
||||
int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
|
||||
int WordMask[8];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
WordMask[i + NOffset] = Mask[i] + NOffset;
|
||||
WordMask[i + VOffset] = VMask[i] + VOffset;
|
||||
}
|
||||
// Map the word mask through the DWord mask.
|
||||
int MappedMask[8];
|
||||
for (int i = 0; i < 8; ++i)
|
||||
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
|
||||
const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
|
||||
const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
|
||||
if (std::equal(std::begin(MappedMask), std::end(MappedMask),
|
||||
std::begin(UnpackLoMask)) ||
|
||||
std::equal(std::begin(MappedMask), std::end(MappedMask),
|
||||
std::begin(UnpackHiMask))) {
|
||||
// We can replace all three shuffles with an unpack.
|
||||
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
|
||||
DCI.AddToWorklist(V.getNode());
|
||||
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
|
||||
: X86ISD::UNPCKH,
|
||||
DL, MVT::v8i16, V, V);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case X86ISD::PSHUFD:
|
||||
|
@ -44,14 +44,22 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(
|
||||
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
|
||||
; CHECK-SSE2: # BB#0:
|
||||
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7]
|
||||
; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <16 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
|
||||
; CHECK-SSE2: # BB#0:
|
||||
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
|
||||
ret <16 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
|
||||
; CHECK-SSE2: # BB#0:
|
||||
|
Loading…
Reference in New Issue
Block a user