From f7d625e31a62f131ac785a17818b0f7ae7951360 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 13 May 2021 16:58:05 +0300 Subject: [PATCH] X86: improve (V)PMADDWD detection In function combineMulToPMADDWD, if 17 bit are sign bits, not just zero bits, the optimization can be applied sometimes. For now, detect and replace SRA pairs with SRL. --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 193a11a7a5e..c097abd3041 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -43531,6 +43531,22 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, return SDValue(); APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (N0.getOpcode() == ISD::SRA && N1.getOpcode() == ISD::SRA) { + // If both arguments are sign-extended, try to replace sign extends + // with zero extends, which should qualify for the optimization. + // Otherwise just fallback to zero-extension check. + if (isa(N0.getOperand(1).getOperand(0)) && + N0.getOperand(1).getConstantOperandVal(0) == 16 && + isa(N1.getOperand(1).getOperand(0)) && + N1.getOperand(1).getConstantOperandVal(0) == 16) { + // Nullify mask to pass the following check + Mask17 = 0; + N0 = DAG.getNode(ISD::SRL, N0.getNode(), VT, N0.getOperand(0), + N0.getOperand(1)); + N1 = DAG.getNode(ISD::SRL, N1.getNode(), VT, N1.getOperand(0), + N1.getOperand(1)); + } + } if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17)) return SDValue();