mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86][SSE] combineTargetShuffle - rearrange shuffle(hop,hop) matching to delay shuffle mask manipulation. NFC.
Check that we're shuffling hadd/pack ops first before altering shuffle masks. First step towards adding extra functionality, plus it avoids costly shuffle mask manipulation if not necessary.
This commit is contained in:
parent
1e613cfd76
commit
625f6b51ab
@ -35918,36 +35918,35 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
||||
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
|
||||
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
|
||||
// represents the LHS/RHS inputs for the lower/upper halves.
|
||||
SmallVector<int, 16> TargetMask128;
|
||||
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2 &&
|
||||
isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128)) {
|
||||
SmallVector<int, 16> WidenedMask128;
|
||||
if (scaleShuffleElements(TargetMask128, 2, WidenedMask128)) {
|
||||
assert(isUndefOrZeroOrInRange(WidenedMask128, 0, 4) && "Illegal shuffle");
|
||||
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
|
||||
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
|
||||
EVT VT0 = BC0.getValueType();
|
||||
EVT VT1 = BC1.getValueType();
|
||||
unsigned Opcode0 = BC0.getOpcode();
|
||||
unsigned Opcode1 = BC1.getOpcode();
|
||||
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
|
||||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
|
||||
if (Opcode0 == Opcode1 && VT0 == VT1 &&
|
||||
(isHoriz || Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) {
|
||||
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2) {
|
||||
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
|
||||
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
|
||||
EVT VT0 = BC0.getValueType();
|
||||
EVT VT1 = BC1.getValueType();
|
||||
unsigned Opcode0 = BC0.getOpcode();
|
||||
unsigned Opcode1 = BC1.getOpcode();
|
||||
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
|
||||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
|
||||
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
|
||||
if (Opcode0 == Opcode1 && VT0 == VT1 && (isHoriz || isPack)) {
|
||||
SmallVector<int, 16> TargetMask128, WideMask128;
|
||||
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
|
||||
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
|
||||
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
|
||||
bool SingleOp = (TargetOps.size() == 1);
|
||||
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
|
||||
SDValue Lo = isInRange(WidenedMask128[0], 0, 2) ? BC0 : BC1;
|
||||
SDValue Hi = isInRange(WidenedMask128[1], 0, 2) ? BC0 : BC1;
|
||||
Lo = Lo.getOperand(WidenedMask128[0] & 1);
|
||||
Hi = Hi.getOperand(WidenedMask128[1] & 1);
|
||||
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
|
||||
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
|
||||
Lo = Lo.getOperand(WideMask128[0] & 1);
|
||||
Hi = Hi.getOperand(WideMask128[1] & 1);
|
||||
if (SingleOp) {
|
||||
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
|
||||
SDValue Undef = DAG.getUNDEF(SrcVT);
|
||||
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
|
||||
Lo = (WidenedMask128[0] == SM_SentinelZero ? Zero : Lo);
|
||||
Hi = (WidenedMask128[1] == SM_SentinelZero ? Zero : Hi);
|
||||
Lo = (WidenedMask128[0] == SM_SentinelUndef ? Undef : Lo);
|
||||
Hi = (WidenedMask128[1] == SM_SentinelUndef ? Undef : Hi);
|
||||
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
|
||||
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
|
||||
}
|
||||
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
|
||||
return DAG.getBitcast(VT, Horiz);
|
||||
|
Loading…
x
Reference in New Issue
Block a user