[DAGCombiner] Require ninf for division estimation

Current implementation of division estimation isn't correct for some cases like 1.0/0.0 (result is nan, not expected inf). And this change exposes a potential infinite loop: we use isConstOrConstSplatFP in combineRepeatedFPDivisors to look up if the divisor is some constant. But it doesn't work after legalized on some platforms. This patch restricts the method to act before LegalDAG. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D80542
2024-11-23 03:02:36 +01:00 · 2020-06-14 22:58:22 +08:00 · 2020-06-14 22:58:22 +08:00 · e59e06d663
commit e59e06d663
parent 0a4fe71f16
9 changed files with 56 additions and 23 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -13032,7 +13032,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
  //       that only minsize should restrict this.
  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
  const SDNodeFlags Flags = N->getFlags();
-  if (!UnsafeMath && !Flags.hasAllowReciprocal())
+  if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
    return SDValue();

  // Skip if current node is a reciprocal/fneg-reciprocal.
@ -13186,8 +13186,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    }

    // Fold into a reciprocal estimate and multiply instead of a real divide.
-    if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
-      return RV;
+    if (Options.NoInfsFPMath || Flags.hasNoInfs())
+      if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+        return RV;
  }

  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
--- a/test/CodeGen/AMDGPU/fdiv.ll
+++ b/test/CodeGen/AMDGPU/fdiv.ll
@ -32,7 +32,7 @@
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
 define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
-  %fdiv = fdiv float %a, %b
+  %fdiv = fdiv ninf float %a, %b
  store float %fdiv, float addrspace(1)* %out
  ret void
 }
@ -152,7 +152,7 @@ entry:
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
-  %fdiv = fdiv arcp float %a, %b
+  %fdiv = fdiv arcp ninf float %a, %b
  store float %fdiv, float addrspace(1)* %out
  ret void
 }
@ -210,7 +210,7 @@ entry:
 ; GCN: v_rcp_f32
 define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
-  %fdiv = fdiv arcp <2 x float> %a, %b
+  %fdiv = fdiv arcp ninf <2 x float> %a, %b
  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
  ret void
 }
@ -279,7 +279,7 @@ define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out,
  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
  %a = load <4 x float>, <4 x float> addrspace(1) * %in
  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
-  %result = fdiv arcp <4 x float> %a, %b
+  %result = fdiv arcp ninf <4 x float> %a, %b
  store <4 x float> %result, <4 x float> addrspace(1)* %out
  ret void
 }
--- a/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/test/CodeGen/PowerPC/combine-fneg.ll
@ -23,7 +23,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 entry:
  %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
  %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
-  %div = fdiv reassoc nsz arcp <4 x double> %a1, %splat.splat
+  %div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat
  %sub = fsub reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
  ret <4 x double> %sub
 }
--- a/test/CodeGen/PowerPC/fdiv.ll
+++ b/test/CodeGen/PowerPC/fdiv.ll
@ -1,6 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s

+define dso_local float @foo_nosw(float %0, float %1) local_unnamed_addr {
+; CHECK-LABEL: foo_nosw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xsdivsp 1, 1, 2
+; CHECK-NEXT:    blr
+  %3 = fdiv reassoc arcp nsz float %0, %1
+  ret float %3
+}
+
 define dso_local float @foo(float %0, float %1) local_unnamed_addr {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
@ -10,6 +19,6 @@ define dso_local float @foo(float %0, float %1) local_unnamed_addr {
 ; CHECK-NEXT:    xsmaddasp 0, 3, 1
 ; CHECK-NEXT:    fmr 1, 0
 ; CHECK-NEXT:    blr
-  %3 = fdiv reassoc arcp nsz float %0, %1
+  %3 = fdiv reassoc arcp nsz ninf float %0, %1
  ret float %3
 }
--- a/test/CodeGen/PowerPC/qpx-recipest.ll
+++ b/test/CodeGen/PowerPC/qpx-recipest.ll
@ -236,7 +236,7 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-NEXT:    qvfmadd 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
-  %r = fdiv arcp reassoc nsz <4 x double> %a, %b
+  %r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
  ret <4 x double> %r
 }

@ -272,7 +272,7 @@ define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-NEXT:    qvfmadds 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
-  %r = fdiv arcp reassoc <4 x float> %a, %b
+  %r = fdiv arcp reassoc ninf <4 x float> %a, %b
  ret <4 x float> %r
 }

--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@ -431,7 +431,7 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
 ; CHECK-P9-NEXT:    blr
  %x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a)
  %y = fmul reassoc nsz float %x, %b
-  %z = fdiv reassoc arcp nsz float %c, %y
+  %z = fdiv reassoc arcp nsz ninf float %c, %y
  ret float %z
 }

@ -602,7 +602,7 @@ define double @foo2_fmf(double %a, double %b) nounwind {
 ; CHECK-P9-NEXT:    xsmaddadp 0, 3, 1
 ; CHECK-P9-NEXT:    fmr 1, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz double %a, %b
+  %r = fdiv reassoc arcp nsz ninf double %a, %b
  ret double %r
 }

@ -651,7 +651,7 @@ define float @goo2_fmf(float %a, float %b) nounwind {
 ; CHECK-P9-NEXT:    xsmaddasp 0, 3, 1
 ; CHECK-P9-NEXT:    fmr 1, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz float %a, %b
+  %r = fdiv reassoc arcp nsz ninf float %a, %b
  ret float %r
 }

@ -705,7 +705,7 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-P9-NEXT:    xvmaddasp 0, 1, 34
 ; CHECK-P9-NEXT:    xxlor 34, 0, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz <4 x float> %a, %b
+  %r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b
  ret <4 x float> %r
 }

--- a/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@ -1,15 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s

+; Check if this causes infinite loop when estimation disabled
+define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
+; CHECK-LABEL: repeated_fp_divisor_noest:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdpspn 0, 1
+; CHECK-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI0_1@toc@l
+; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NEXT:    xxspltw 0, 0, 0
+; CHECK-NEXT:    xvdivsp 0, 35, 0
+; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xvmulsp 1, 34, 35
+; CHECK-NEXT:    xvmulsp 34, 1, 0
+; CHECK-NEXT:    blr
+  %ins = insertelement <4 x float> undef, float %a, i32 0
+  %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
+  %t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
+  %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
+  ret <4 x float> %mul
+}
+
 define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
 ; CHECK-LABEL: repeated_fp_divisor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xscvdpspn 0, 1
-; CHECK-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_0@toc@l
 ; CHECK-NEXT:    lvx 3, 0, 3
-; CHECK-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI0_1@toc@l
+; CHECK-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_1@toc@l
 ; CHECK-NEXT:    lvx 4, 0, 3
 ; CHECK-NEXT:    xxspltw 0, 0, 0
 ; CHECK-NEXT:    xvresp 1, 0
@ -21,7 +44,7 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
  %ins = insertelement <4 x float> undef, float %a, i32 0
  %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
  %t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
-  %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
+  %mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat
  ret <4 x float> %mul
 }

--- a/test/CodeGen/PowerPC/vsx-recip-est.ll
+++ b/test/CodeGen/PowerPC/vsx-recip-est.ll
@ -10,7 +10,7 @@ define float @emit_xsresp() {
 entry:
  %0 = load float, float* @a, align 4
  %1 = load float, float* @b, align 4
-  %div = fdiv arcp float %0, %1
+  %div = fdiv arcp ninf float %0, %1
  ret float %div
 ; CHECK-LABEL: @emit_xsresp
 ; CHECK: xsresp {{[0-9]+}}
@ -38,7 +38,7 @@ define double @emit_xsredp() {
 entry:
  %0 = load double, double* @c, align 8
  %1 = load double, double* @d, align 8
-  %div = fdiv arcp double %0, %1
+  %div = fdiv arcp ninf double %0, %1
  ret double %div
 ; CHECK-LABEL: @emit_xsredp
 ; CHECK: xsredp {{[0-9]+}}
--- a/test/CodeGen/X86/fdiv-combine-vec.ll
+++ b/test/CodeGen/X86/fdiv-combine-vec.ll
@ -120,7 +120,7 @@ define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
 ; AVX-NEXT:    retq
  %vy = insertelement <4 x float> undef, float %y, i32 0
  %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
-  %r = fdiv arcp reassoc <4 x float> %x, %splaty
+  %r = fdiv arcp reassoc ninf <4 x float> %x, %splaty
  ret <4 x float> %r
 }