[DAGCombiner] cancel fnegs from multiplied operands of FMA

(-X) * (-Y) + Z --> X * Y + Z This is a missing optimization that shows up as a potential regression in D66050, so we should solve it first. We appear to be partly missing this fold in IR as well. We do handle the simpler case already: (-X) * (-Y) --> X * Y And it might be beneficial to make the constraint less conservative (eg, if both operands are cheap, but not necessarily cheaper), but that causes infinite looping for the existing fmul transform. Differential Revision: https://reviews.llvm.org/D66755 llvm-svn: 370071
2025-01-31 12:41:49 +01:00 · 2019-08-27 15:17:46 +00:00 · 2019-08-27 15:17:46 +00:00 · 7bb0bc7bff
commit 7bb0bc7bff
parent 24233f2415
3 changed files with 33 additions and 20 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -516,6 +516,7 @@ namespace {
    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
                           SDValue &CC) const;
    bool isOneUseSetCC(SDValue N) const;
+    bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);

    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                         unsigned HiOp);
@ -12110,6 +12111,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  return SDValue();
 }

+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+  const TargetOptions &Options = DAG.getTarget().Options;
+  if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
+                                   ForCodeSize))
+    if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
+                                         ForCodeSize))
+      // Both negated operands are at least as cheap as their counterparts.
+      // Check to see if at least one is cheaper negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return true;
+
+  return false;
+}
+
 SDValue DAGCombiner::visitFMUL(SDNode *N) {
  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
@ -12180,21 +12197,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
      return DAG.getNode(ISD::FNEG, DL, VT, N0);

-  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
-                                       ForCodeSize)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
-                                         ForCodeSize)) {
-      // Both can be negated for free, check to see if at least one is cheaper
-      // negated.
-      if (LHSNeg == 2 || RHSNeg == 2)
-        return DAG.getNode(ISD::FMUL, DL, VT,
-                           GetNegatedExpression(N0, DAG, LegalOperations,
-                                                ForCodeSize),
-                           GetNegatedExpression(N1, DAG, LegalOperations,
-                                                ForCodeSize),
-                           Flags);
-    }
+  // -N0 * -N1 --> N0 * N1
+  if (isCheaperToUseNegatedFPOps(N0, N1)) {
+    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
  }

  // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@ -12273,6 +12280,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
  }

+  // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+  if (isCheaperToUseNegatedFPOps(N0, N1)) {
+    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+  }
+
  if (UnsafeFPMath) {
    if (N0CFP && N0CFP->isZero())
      return N2;
--- a/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/test/CodeGen/AMDGPU/fneg-combines.ll
@ -1205,7 +1205,7 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]

-; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
+; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]

 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@ -20,8 +20,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #4
 define float @fma_fneg_fneg(float %x, float %y, float %z) {
 ; CHECK-LABEL: fma_fneg_fneg:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fneg f0, f2
-; CHECK-NEXT:    fnmsubs f1, f1, f0, f3
+; CHECK-NEXT:    fmadds f1, f1, f2, f3
 ; CHECK-NEXT:    blr
  %negx = fneg float %x
  %negy = fneg float %y
@ -32,8 +31,8 @@ define float @fma_fneg_fneg(float %x, float %y, float %z) {
 define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) {
 ; CHECK-LABEL: fma_fneg_fsub:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fsubs f0, f2, f3
-; CHECK-NEXT:    fnmsubs f1, f1, f0, f4
+; CHECK-NEXT:    fsubs f0, f3, f2
+; CHECK-NEXT:    fmadds f1, f1, f0, f4
 ; CHECK-NEXT:    blr
  %negx = fneg float %x
  %negy = fsub nsz float %y0, %y1