1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[DAGCombiner] Require ninf for division estimation

Current implementation of division estimation isn't correct for some
cases like 1.0/0.0 (result is nan, not expected inf).

And this change exposes a potential infinite loop: we use
isConstOrConstSplatFP in combineRepeatedFPDivisors to look up if the
divisor is some constant. But it doesn't work after legalized on some
platforms. This patch restricts the method to act before LegalDAG.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D80542
This commit is contained in:
Qiu Chaofan 2020-06-14 22:58:22 +08:00
parent 0a4fe71f16
commit e59e06d663
9 changed files with 56 additions and 23 deletions

View File

@ -13032,7 +13032,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
if (!UnsafeMath && !Flags.hasAllowReciprocal())
if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
@ -13186,8 +13186,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
return RV;
if (Options.NoInfsFPMath || Flags.hasNoInfs())
if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)

View File

@ -32,7 +32,7 @@
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
entry:
%fdiv = fdiv float %a, %b
%fdiv = fdiv ninf float %a, %b
store float %fdiv, float addrspace(1)* %out
ret void
}
@ -152,7 +152,7 @@ entry:
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
entry:
%fdiv = fdiv arcp float %a, %b
%fdiv = fdiv arcp ninf float %a, %b
store float %fdiv, float addrspace(1)* %out
ret void
}
@ -210,7 +210,7 @@ entry:
; GCN: v_rcp_f32
define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
entry:
%fdiv = fdiv arcp <2 x float> %a, %b
%fdiv = fdiv arcp ninf <2 x float> %a, %b
store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
ret void
}
@ -279,7 +279,7 @@ define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out,
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
%result = fdiv arcp <4 x float> %a, %b
%result = fdiv arcp ninf <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}

View File

@ -23,7 +23,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
entry:
%splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
%splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
%div = fdiv reassoc nsz arcp <4 x double> %a1, %splat.splat
%div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat
%sub = fsub reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
ret <4 x double> %sub
}

View File

@ -1,6 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
define dso_local float @foo_nosw(float %0, float %1) local_unnamed_addr {
; CHECK-LABEL: foo_nosw:
; CHECK: # %bb.0:
; CHECK-NEXT: xsdivsp 1, 1, 2
; CHECK-NEXT: blr
%3 = fdiv reassoc arcp nsz float %0, %1
ret float %3
}
define dso_local float @foo(float %0, float %1) local_unnamed_addr {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
@ -10,6 +19,6 @@ define dso_local float @foo(float %0, float %1) local_unnamed_addr {
; CHECK-NEXT: xsmaddasp 0, 3, 1
; CHECK-NEXT: fmr 1, 0
; CHECK-NEXT: blr
%3 = fdiv reassoc arcp nsz float %0, %1
%3 = fdiv reassoc arcp nsz ninf float %0, %1
ret float %3
}

View File

@ -236,7 +236,7 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-NEXT: qvfmadd 1, 0, 1, 3
; CHECK-NEXT: blr
entry:
%r = fdiv arcp reassoc nsz <4 x double> %a, %b
%r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
ret <4 x double> %r
}
@ -272,7 +272,7 @@ define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-NEXT: qvfmadds 1, 0, 1, 3
; CHECK-NEXT: blr
entry:
%r = fdiv arcp reassoc <4 x float> %a, %b
%r = fdiv arcp reassoc ninf <4 x float> %a, %b
ret <4 x float> %r
}

View File

@ -431,7 +431,7 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
; CHECK-P9-NEXT: blr
%x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a)
%y = fmul reassoc nsz float %x, %b
%z = fdiv reassoc arcp nsz float %c, %y
%z = fdiv reassoc arcp nsz ninf float %c, %y
ret float %z
}
@ -602,7 +602,7 @@ define double @foo2_fmf(double %a, double %b) nounwind {
; CHECK-P9-NEXT: xsmaddadp 0, 3, 1
; CHECK-P9-NEXT: fmr 1, 0
; CHECK-P9-NEXT: blr
%r = fdiv reassoc arcp nsz double %a, %b
%r = fdiv reassoc arcp nsz ninf double %a, %b
ret double %r
}
@ -651,7 +651,7 @@ define float @goo2_fmf(float %a, float %b) nounwind {
; CHECK-P9-NEXT: xsmaddasp 0, 3, 1
; CHECK-P9-NEXT: fmr 1, 0
; CHECK-P9-NEXT: blr
%r = fdiv reassoc arcp nsz float %a, %b
%r = fdiv reassoc arcp nsz ninf float %a, %b
ret float %r
}
@ -705,7 +705,7 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-P9-NEXT: xvmaddasp 0, 1, 34
; CHECK-P9-NEXT: xxlor 34, 0, 0
; CHECK-P9-NEXT: blr
%r = fdiv reassoc arcp nsz <4 x float> %a, %b
%r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b
ret <4 x float> %r
}

View File

@ -1,15 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s
; Check if this causes infinite loop when estimation disabled
define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
; CHECK-LABEL: repeated_fp_divisor_noest:
; CHECK: # %bb.0:
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
; CHECK-NEXT: lvx 3, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvdivsp 0, 35, 0
; CHECK-NEXT: lvx 3, 0, 3
; CHECK-NEXT: xvmulsp 1, 34, 35
; CHECK-NEXT: xvmulsp 34, 1, 0
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
%t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
%mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
ret <4 x float> %mul
}
define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
; CHECK-LABEL: repeated_fp_divisor:
; CHECK: # %bb.0:
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l
; CHECK-NEXT: lvx 3, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l
; CHECK-NEXT: lvx 4, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvresp 1, 0
@ -21,7 +44,7 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
%ins = insertelement <4 x float> undef, float %a, i32 0
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
%t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
%mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
%mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat
ret <4 x float> %mul
}

View File

@ -10,7 +10,7 @@ define float @emit_xsresp() {
entry:
%0 = load float, float* @a, align 4
%1 = load float, float* @b, align 4
%div = fdiv arcp float %0, %1
%div = fdiv arcp ninf float %0, %1
ret float %div
; CHECK-LABEL: @emit_xsresp
; CHECK: xsresp {{[0-9]+}}
@ -38,7 +38,7 @@ define double @emit_xsredp() {
entry:
%0 = load double, double* @c, align 8
%1 = load double, double* @d, align 8
%div = fdiv arcp double %0, %1
%div = fdiv arcp ninf double %0, %1
ret double %div
; CHECK-LABEL: @emit_xsredp
; CHECK: xsredp {{[0-9]+}}

View File

@ -120,7 +120,7 @@ define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
; AVX-NEXT: retq
%vy = insertelement <4 x float> undef, float %y, i32 0
%splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
%r = fdiv arcp reassoc <4 x float> %x, %splaty
%r = fdiv arcp reassoc ninf <4 x float> %x, %splaty
ret <4 x float> %r
}