1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86][SSE] combineTargetShuffle - rearrange shuffle(hop,hop) matching to delay shuffle mask manipulation. NFC.

Check that we're shuffling hadd/pack ops first before altering shuffle masks.

First step towards adding extra functionality, plus it avoids costly shuffle mask manipulation if not necessary.
This commit is contained in:
Simon Pilgrim 2020-08-10 13:06:36 +01:00
parent 1e613cfd76
commit 625f6b51ab

View File

@ -35918,36 +35918,35 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
// represents the LHS/RHS inputs for the lower/upper halves.
SmallVector<int, 16> TargetMask128;
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2 &&
isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128)) {
SmallVector<int, 16> WidenedMask128;
if (scaleShuffleElements(TargetMask128, 2, WidenedMask128)) {
assert(isUndefOrZeroOrInRange(WidenedMask128, 0, 4) && "Illegal shuffle");
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
EVT VT0 = BC0.getValueType();
EVT VT1 = BC1.getValueType();
unsigned Opcode0 = BC0.getOpcode();
unsigned Opcode1 = BC1.getOpcode();
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
if (Opcode0 == Opcode1 && VT0 == VT1 &&
(isHoriz || Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) {
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2) {
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
EVT VT0 = BC0.getValueType();
EVT VT1 = BC1.getValueType();
unsigned Opcode0 = BC0.getOpcode();
unsigned Opcode1 = BC1.getOpcode();
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
if (Opcode0 == Opcode1 && VT0 == VT1 && (isHoriz || isPack)) {
SmallVector<int, 16> TargetMask128, WideMask128;
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
bool SingleOp = (TargetOps.size() == 1);
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
SDValue Lo = isInRange(WidenedMask128[0], 0, 2) ? BC0 : BC1;
SDValue Hi = isInRange(WidenedMask128[1], 0, 2) ? BC0 : BC1;
Lo = Lo.getOperand(WidenedMask128[0] & 1);
Hi = Hi.getOperand(WidenedMask128[1] & 1);
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
Lo = Lo.getOperand(WideMask128[0] & 1);
Hi = Hi.getOperand(WideMask128[1] & 1);
if (SingleOp) {
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
SDValue Undef = DAG.getUNDEF(SrcVT);
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
Lo = (WidenedMask128[0] == SM_SentinelZero ? Zero : Lo);
Hi = (WidenedMask128[1] == SM_SentinelZero ? Zero : Hi);
Lo = (WidenedMask128[0] == SM_SentinelUndef ? Undef : Lo);
Hi = (WidenedMask128[1] == SM_SentinelUndef ? Undef : Hi);
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
}
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
return DAG.getBitcast(VT, Horiz);