[AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy

This follows on from D89558 which added the new intrinsic and D88955 which added similar combines for llvm.amdgcn.fmul.legacy. Differential Revision: https://reviews.llvm.org/D90028
2024-11-22 10:42:39 +01:00 · 2020-10-23 12:52:14 +01:00 · 2020-10-23 12:52:14 +01:00 · 9321aed101
commit 9321aed101
parent 91be48b03e
4 changed files with 147 additions and 14 deletions
--- a/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@ -163,6 +163,27 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
  return IC.replaceInstUsesWith(II, NewCall);
 }

+bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+                                           InstCombiner &IC) const {
+  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+  // infinity, gives +0.0. If we can prove we don't have one of the special
+  // cases then we can use a normal multiply instead.
+  // TODO: Create and use isKnownFiniteNonZero instead of just matching
+  // constants here.
+  if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
+      match(Op1, PatternMatch::m_FiniteNonZero())) {
+    // One operand is not zero or infinity or NaN.
+    return true;
+  }
+  auto *TLI = &IC.getTargetLibraryInfo();
+  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
+      isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+    // Neither operand is infinity or NaN.
+    return true;
+  }
+  return false;
+}
+
 Optional<Instruction *>
 GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  Intrinsic::ID IID = II.getIntrinsicID();
@ -836,26 +857,40 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

    // If we can prove we don't have one of the special cases then we can use a
    // normal fmul instruction instead.
-    auto *TLI = &IC.getTargetLibraryInfo();
-    bool CanSimplifyToMul = false;
-    // TODO: Create and use isKnownFiniteNonZero instead of just matching
-    // constants here.
-    if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
-        match(Op1, PatternMatch::m_FiniteNonZero())) {
-      // One operand is not zero or infinity or NaN.
-      CanSimplifyToMul = true;
-    } else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
-               isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
-      // Neither operand is infinity or NaN.
-      CanSimplifyToMul = true;
-    }
-    if (CanSimplifyToMul) {
+    if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
      auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
      FMul->takeName(&II);
      return IC.replaceInstUsesWith(II, FMul);
    }
    break;
  }
+  case Intrinsic::amdgcn_fma_legacy: {
+    Value *Op0 = II.getArgOperand(0);
+    Value *Op1 = II.getArgOperand(1);
+    Value *Op2 = II.getArgOperand(2);
+
+    // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+    // infinity, gives +0.0.
+    // TODO: Move to InstSimplify?
+    if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
+        match(Op1, PatternMatch::m_AnyZeroFP())) {
+      // It's tempting to just return Op2 here, but that would give the wrong
+      // result if Op2 was -0.0.
+      auto *Zero = ConstantFP::getNullValue(II.getType());
+      auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
+      FAdd->takeName(&II);
+      return IC.replaceInstUsesWith(II, FAdd);
+    }
+
+    // If we can prove we don't have one of the special cases then we can use a
+    // normal fma instead.
+    if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+      II.setCalledOperand(Intrinsic::getDeclaration(
+          II.getModule(), Intrinsic::fma, II.getType()));
+      return &II;
+    }
+    break;
+  }
  default: {
    if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
            AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -227,6 +227,8 @@ public:
  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                          Value *NewV) const;

+  bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+                                 InstCombiner &IC) const;
  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                               IntrinsicInst &II) const;
  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
--- a/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
+++ b/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
+
+; Simplify to +0.0 + z.
+define float @test_zero(float %x, float %z) {
+; CHECK-LABEL: @test_zero(
+; CHECK-NEXT:    [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+  ret float %call
+}
+
+; Simplify to +0.0 + z, preserving fmf.
+define float @test_zero_fmf(float %x, float %z) {
+; CHECK-LABEL: @test_zero_fmf(
+; CHECK-NEXT:    [[CALL:%.*]] = fadd contract float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+  ret float %call
+}
+
+; Simplify to z.
+define float @test_zero_nsz(float %x, float %z) {
+; CHECK-LABEL: @test_zero_nsz(
+; CHECK-NEXT:    ret float [[Z:%.*]]
+;
+  %call = call nsz float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+  ret float %call
+}
+
+; Simplify to +0.0 + z.
+define float @test_negzero(float %y, float %z) {
+; CHECK-LABEL: @test_negzero(
+; CHECK-NEXT:    [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
+  ret float %call
+}
+
+; Simplify to z.
+define float @test_negzero_nsz(float %y, float %z) {
+; CHECK-LABEL: @test_negzero_nsz(
+; CHECK-NEXT:    ret float [[Z:%.*]]
+;
+  %call = call nsz float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
+  ret float %call
+}
+
+; Combine to fma because the constant is finite and non-zero.
+define float @test_const(float %x, float %z) {
+; CHECK-LABEL: @test_const(
+; CHECK-NEXT:    [[CALL:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
+  ret float %call
+}
+
+; Combine to fma because the constant is finite and non-zero, preserving fmf.
+define float @test_const_fmf(float %x, float %z) {
+; CHECK-LABEL: @test_const_fmf(
+; CHECK-NEXT:    [[CALL:%.*]] = call contract float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
+  ret float %call
+}
+
+; Combine to fma because neither argument can be infinity or NaN.
+define float @test_finite(i32 %x, i32 %y, float %z) {
+; CHECK-LABEL: @test_finite(
+; CHECK-NEXT:    [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT:    [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[CALL:%.*]] = call float @llvm.fma.f32(float [[XF]], float [[YF]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %xf = sitofp i32 %x to float
+  %yf = sitofp i32 %y to float
+  %call = call float @llvm.amdgcn.fma.legacy(float %xf, float %yf, float %z)
+  ret float %call
+}
+
+declare float @llvm.amdgcn.fma.legacy(float, float, float)
--- a/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
+++ b/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
@ -29,6 +29,16 @@ define float @test_const(float %x) {
  ret float %call
 }

+; Combine to fmul because the constant is finite and non-zero, preserving fmf.
+define float @test_const_fmf(float %x) {
+; CHECK-LABEL: @test_const_fmf(
+; CHECK-NEXT:    [[CALL:%.*]] = fmul contract float [[X:%.*]], 9.950000e+01
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call contract float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
+  ret float %call
+}
+
 ; Combine to fmul because neither argument can be infinity or NaN.
 define float @test_finite(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test_finite(