[ConstantFolding] Fold constrained arithmetic intrinsics

Constfold constrained variants of operations fadd, fsub, fmul, fdiv, frem, fma and fmuladd. The change also sets up some means to support for removal of unused constrained intrinsics. They are declared as accessing memory to model interaction with floating point environment, so they were not removed, as they have side effect. Now constrained intrinsics that have "fpexcept.ignore" as exception behavior are removed if they have no uses. As for intrinsics that have exception behavior other than "fpexcept.ignore", they can be removed if it is known that they do not raise floating point exceptions. It happens when doing constant folding, attributes of such intrinsic are changed so that the intrinsic is not claimed as accessing memory. Differential Revision: https://reviews.llvm.org/D102673
2024-11-25 04:02:41 +01:00 · 2021-05-04 20:43:56 +07:00 · 2021-05-04 20:43:56 +07:00 · 865c54f488
commit 865c54f488
parent 0e5d17756d
4 changed files with 322 additions and 7 deletions
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@ -1593,6 +1593,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
  case Intrinsic::rint:
  // Constrained intrinsics can be folded if FP environment is known
  // to compiler.
+  case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fmuladd:
+  case Intrinsic::experimental_constrained_fadd:
+  case Intrinsic::experimental_constrained_fsub:
+  case Intrinsic::experimental_constrained_fmul:
+  case Intrinsic::experimental_constrained_fdiv:
+  case Intrinsic::experimental_constrained_frem:
  case Intrinsic::experimental_constrained_ceil:
  case Intrinsic::experimental_constrained_floor:
  case Intrinsic::experimental_constrained_round:
@ -1854,6 +1861,56 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
  return false;
 }

+/// Checks if the given intrinsic call, which evaluates to constant, is allowed
+/// to be folded.
+///
+/// \param CI Constrained intrinsic call.
+/// \param St Exception flags raised during constant evaluation.
+static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
+                               APFloat::opStatus St) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+
+  // If the operation does not change exception status flags, it is safe
+  // to fold.
+  if (St == APFloat::opStatus::opOK) {
+    // When FP exceptions are not ignored, intrinsic call will not be
+    // eliminated, because it is considered as having side effect. But we
+    // know that its evaluation does not raise exceptions, so side effect
+    // is absent. To allow removing the call, mark it as not accessing memory.
+    if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
+      CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+    return true;
+  }
+
+  // If evaluation raised FP exception, the result can depend on rounding
+  // mode. If the latter is unknown, folding is not possible.
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    return false;
+
+  // If FP exceptions are ignored, fold the call, even if such exception is
+  // raised.
+  if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
+    return true;
+
+  // Leave the calculation for runtime so that exception flags be correctly set
+  // in hardware.
+  return false;
+}
+
+/// Returns the rounding mode that should be used for constant evaluation.
+static RoundingMode
+getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    // Even if the rounding mode is unknown, try evaluating the operation.
+    // If it does not raise inexact exception, rounding was not applied,
+    // so the result is exact and does not depend on rounding mode. Whether
+    // other FP exceptions are raised, it does not depend on rounding mode.
+    return RoundingMode::NearestTiesToEven;
+  return *ORM;
+}
+
 static Constant *ConstantFoldScalarCall1(StringRef Name,
                                         Intrinsic::ID IntrinsicID,
                                         Type *Ty,
@ -2356,16 +2413,45 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
    }
  }

-  if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+  if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
    if (!Ty->isFloatingPointTy())
      return nullptr;
    APFloat Op1V = Op1->getValueAPF();

-    if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+    if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
      if (Op2->getType() != Op1->getType())
        return nullptr;
      APFloat Op2V = Op2->getValueAPF();

+      if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+        RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+        APFloat Res = Op1V;
+        APFloat::opStatus St;
+        switch (IntrinsicID) {
+        default:
+          return nullptr;
+        case Intrinsic::experimental_constrained_fadd:
+          St = Res.add(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fsub:
+          St = Res.subtract(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fmul:
+          St = Res.multiply(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fdiv:
+          St = Res.divide(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_frem:
+          St = Res.mod(Op2V);
+          break;
+        }
+        if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
+                               St))
+          return ConstantFP::get(Ty->getContext(), Res);
+        return nullptr;
+      }
+
      switch (IntrinsicID) {
      default:
        break;
@ -2437,6 +2523,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
        break;
      }
    } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+        return nullptr;
      if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
        return ConstantFP::get(
            Ty->getContext(),
@ -2772,6 +2860,25 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
        const APFloat &C1 = Op1->getValueAPF();
        const APFloat &C2 = Op2->getValueAPF();
        const APFloat &C3 = Op3->getValueAPF();
+
+        if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+          RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+          APFloat Res = C1;
+          APFloat::opStatus St;
+          switch (IntrinsicID) {
+          default:
+            return nullptr;
+          case Intrinsic::experimental_constrained_fma:
+          case Intrinsic::experimental_constrained_fmuladd:
+            St = Res.fusedMultiplyAdd(C2, C3, RM);
+            break;
+          }
+          if (mayFoldConstrained(
+                  const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
+            return ConstantFP::get(Ty->getContext(), Res);
+          return nullptr;
+        }
+
        switch (IntrinsicID) {
        default: break;
        case Intrinsic::amdgcn_fma_legacy: {
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@ -491,6 +491,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
    if (isMathLibCallNoop(Call, TLI))
      return true;

+  // To express possible interaction with floating point environment constrained
+  // intrinsics are described as if they access memory. So they look like having
+  // side effect but actually do not have it unless they raise floating point
+  // exception. If FP exceptions are ignored, the intrinsic may be deleted.
+  if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+    Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+    if (!EB || *EB == fp::ExceptionBehavior::ebIgnore)
+      return true;
+  }
+
  return false;
 }

--- a/test/Transforms/InstSimplify/constfold-constrained.ll
+++ b/test/Transforms/InstSimplify/constfold-constrained.ll
@ -234,6 +234,186 @@ entry:
  ret double %result
 }

+define float @fadd_01() #0 {
+; CHECK-LABEL: @fadd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret float 3.000000e+01
+;
+entry:
+  %result = call float @llvm.experimental.constrained.fadd.f32(float 1.000000e+01, float 2.000000e+01, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret float %result
+}
+
+; Inexact result does not prevent from folding if exceptions are ignored and
+; rounding mode is known.
+define double @fadd_02() #0 {
+; CHECK-LABEL: @fadd_02(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_03() #0 {
+; CHECK-LABEL: @fadd_03(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x4000000000000001
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.upward", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Inexact result prevents from folding if exceptions may be checked.
+define double @fadd_04() #0 {
+; CHECK-LABEL: @fadd_04(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; If result is exact, folding is allowed even if exceptions may be checked.
+define double @fadd_05() #0 {
+; CHECK-LABEL: @fadd_05(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Dynamic rounding mode does not prevent from folding if the result is exact.
+define double @fadd_06() #0 {
+; CHECK-LABEL: @fadd_06(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Inexact results prevents from folding if rounding mode is unknown.
+define double @fadd_07() #0 {
+; CHECK-LABEL: @fadd_07(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Infinite result does not prevent from folding unless exceptions are tracked.
+define double @fadd_08() #0 {
+; CHECK-LABEL: @fadd_08(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x7FF0000000000000
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_09() #0 {
+; CHECK-LABEL: @fadd_09(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define half @fadd_10() #0 {
+; CHECK-LABEL: @fadd_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret half 0xH4200
+;
+entry:
+  %result = call half @llvm.experimental.constrained.fadd.f16(half 1.0, half 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret half %result
+}
+
+define bfloat @fadd_11() #0 {
+; CHECK-LABEL: @fadd_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret bfloat 0xR4040
+;
+entry:
+  %result = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat 1.0, bfloat 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret bfloat %result
+}
+
+define double @fsub_01() #0 {
+; CHECK-LABEL: @fsub_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fsub.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmul_01() #0 {
+; CHECK-LABEL: @fmul_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmul.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fdiv_01() #0 {
+; CHECK-LABEL: @fdiv_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e-01
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fdiv.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @frem_01() #0 {
+; CHECK-LABEL: @frem_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.frem.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fma_01() #0 {
+; CHECK-LABEL: @fma_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmuladd_01() #0 {
+; CHECK-LABEL: @fmuladd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+

 attributes #0 = { strictfp }

@ -243,4 +423,14 @@ declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
 declare double @llvm.experimental.constrained.round.f64(double, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata)
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)

--- a/test/Transforms/InstSimplify/fdiv-strictfp.ll
+++ b/test/Transforms/InstSimplify/fdiv-strictfp.ll
@ -12,14 +12,23 @@ define float @fdiv_constant_fold() #0 {

 define float @fdiv_constant_fold_strict() #0 {
 ; CHECK-LABEL: @fdiv_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.500000e+00
 ;
  %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0

  ret float %f
 }

+define float @fdiv_constant_fold_strict2() #0 {
+; CHECK-LABEL: @fdiv_constant_fold_strict2(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    ret float [[F]]
+;
+  %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+
+  ret float %f
+}
+
 define float @frem_constant_fold() #0 {
 ; CHECK-LABEL: @frem_constant_fold(
 ; CHECK-NEXT:    ret float 1.000000e+00
@ -30,10 +39,9 @@ define float @frem_constant_fold() #0 {

 define float @frem_constant_fold_strict() #0 {
 ; CHECK-LABEL: @frem_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.000000e+00
 ;
-  %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
  ret float %f
 }