1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

X86: improve (V)PMADDWD detection

In function combineMulToPMADDWD, if 17 bit are sign bits,
not just zero bits, the optimization can be applied sometimes.
For now, detect and replace SRA pairs with SRL.
This commit is contained in:
Nekotekina 2021-05-13 16:58:05 +03:00
parent c36b21c023
commit f7d625e31a

View File

@ -43531,6 +43531,22 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
APInt Mask17 = APInt::getHighBitsSet(32, 17);
if (N0.getOpcode() == ISD::SRA && N1.getOpcode() == ISD::SRA) {
// If both arguments are sign-extended, try to replace sign extends
// with zero extends, which should qualify for the optimization.
// Otherwise just fallback to zero-extension check.
if (isa<ConstantSDNode>(N0.getOperand(1).getOperand(0)) &&
N0.getOperand(1).getConstantOperandVal(0) == 16 &&
isa<ConstantSDNode>(N1.getOperand(1).getOperand(0)) &&
N1.getOperand(1).getConstantOperandVal(0) == 16) {
// Nullify mask to pass the following check
Mask17 = 0;
N0 = DAG.getNode(ISD::SRL, N0.getNode(), VT, N0.getOperand(0),
N0.getOperand(1));
N1 = DAG.getNode(ISD::SRL, N1.getNode(), VT, N1.getOperand(0),
N1.getOperand(1));
}
}
if (!DAG.MaskedValueIsZero(N1, Mask17) ||
!DAG.MaskedValueIsZero(N0, Mask17))
return SDValue();