1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

X86: improve (V)PMADDWD detection

In function combineMulToPMADDWD, if 17 bit are sign bits,
not just zero bits, the optimization can be applied sometimes.
For now, detect and replace SRA pairs with SRL.
This commit is contained in:
Nekotekina 2021-05-13 16:58:05 +03:00
parent 8ed5423cd2
commit a7dd06b0f0

View File

@ -41659,6 +41659,22 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
APInt Mask17 = APInt::getHighBitsSet(32, 17);
if (N0.getOpcode() == ISD::SRA && N1.getOpcode() == ISD::SRA) {
// If both arguments are sign-extended, try to replace sign extends
// with zero extends, which should qualify for the optimization.
// Otherwise just fallback to zero-extension check.
if (isa<ConstantSDNode>(N0.getOperand(1).getOperand(0)) &&
N0.getOperand(1).getConstantOperandVal(0) == 16 &&
isa<ConstantSDNode>(N1.getOperand(1).getOperand(0)) &&
N1.getOperand(1).getConstantOperandVal(0) == 16) {
// Nullify mask to pass the following check
Mask17 = 0;
N0 = DAG.getNode(ISD::SRL, N0.getNode(), VT, N0.getOperand(0),
N0.getOperand(1));
N1 = DAG.getNode(ISD::SRL, N1.getNode(), VT, N1.getOperand(0),
N1.getOperand(1));
}
}
if (!DAG.MaskedValueIsZero(N1, Mask17) ||
!DAG.MaskedValueIsZero(N0, Mask17))
return SDValue();