mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[DAGCombiner] Require ninf for division estimation
Current implementation of division estimation isn't correct for some cases like 1.0/0.0 (result is nan, not expected inf). And this change exposes a potential infinite loop: we use isConstOrConstSplatFP in combineRepeatedFPDivisors to look up if the divisor is some constant. But it doesn't work after legalized on some platforms. This patch restricts the method to act before LegalDAG. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D80542
This commit is contained in:
parent
0a4fe71f16
commit
e59e06d663
@ -13032,7 +13032,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
|
||||
// that only minsize should restrict this.
|
||||
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
|
||||
const SDNodeFlags Flags = N->getFlags();
|
||||
if (!UnsafeMath && !Flags.hasAllowReciprocal())
|
||||
if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
|
||||
return SDValue();
|
||||
|
||||
// Skip if current node is a reciprocal/fneg-reciprocal.
|
||||
@ -13186,8 +13186,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
||||
}
|
||||
|
||||
// Fold into a reciprocal estimate and multiply instead of a real divide.
|
||||
if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
|
||||
return RV;
|
||||
if (Options.NoInfsFPMath || Flags.hasNoInfs())
|
||||
if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
|
||||
return RV;
|
||||
}
|
||||
|
||||
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
|
||||
|
@ -32,7 +32,7 @@
|
||||
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
|
||||
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
entry:
|
||||
%fdiv = fdiv float %a, %b
|
||||
%fdiv = fdiv ninf float %a, %b
|
||||
store float %fdiv, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -152,7 +152,7 @@ entry:
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
entry:
|
||||
%fdiv = fdiv arcp float %a, %b
|
||||
%fdiv = fdiv arcp ninf float %a, %b
|
||||
store float %fdiv, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -210,7 +210,7 @@ entry:
|
||||
; GCN: v_rcp_f32
|
||||
define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
|
||||
entry:
|
||||
%fdiv = fdiv arcp <2 x float> %a, %b
|
||||
%fdiv = fdiv arcp ninf <2 x float> %a, %b
|
||||
store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -279,7 +279,7 @@ define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out,
|
||||
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float>, <4 x float> addrspace(1) * %in
|
||||
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
|
||||
%result = fdiv arcp <4 x float> %a, %b
|
||||
%result = fdiv arcp ninf <4 x float> %a, %b
|
||||
store <4 x float> %result, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
|
||||
entry:
|
||||
%splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
|
||||
%splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
%div = fdiv reassoc nsz arcp <4 x double> %a1, %splat.splat
|
||||
%div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat
|
||||
%sub = fsub reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
|
||||
ret <4 x double> %sub
|
||||
}
|
||||
|
@ -1,6 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
|
||||
|
||||
define dso_local float @foo_nosw(float %0, float %1) local_unnamed_addr {
|
||||
; CHECK-LABEL: foo_nosw:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xsdivsp 1, 1, 2
|
||||
; CHECK-NEXT: blr
|
||||
%3 = fdiv reassoc arcp nsz float %0, %1
|
||||
ret float %3
|
||||
}
|
||||
|
||||
define dso_local float @foo(float %0, float %1) local_unnamed_addr {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
@ -10,6 +19,6 @@ define dso_local float @foo(float %0, float %1) local_unnamed_addr {
|
||||
; CHECK-NEXT: xsmaddasp 0, 3, 1
|
||||
; CHECK-NEXT: fmr 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
%3 = fdiv reassoc arcp nsz float %0, %1
|
||||
%3 = fdiv reassoc arcp nsz ninf float %0, %1
|
||||
ret float %3
|
||||
}
|
||||
|
@ -236,7 +236,7 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-NEXT: qvfmadd 1, 0, 1, 3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = fdiv arcp reassoc nsz <4 x double> %a, %b
|
||||
%r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
@ -272,7 +272,7 @@ define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-NEXT: qvfmadds 1, 0, 1, 3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = fdiv arcp reassoc <4 x float> %a, %b
|
||||
%r = fdiv arcp reassoc ninf <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
@ -431,7 +431,7 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
|
||||
; CHECK-P9-NEXT: blr
|
||||
%x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul reassoc nsz float %x, %b
|
||||
%z = fdiv reassoc arcp nsz float %c, %y
|
||||
%z = fdiv reassoc arcp nsz ninf float %c, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
@ -602,7 +602,7 @@ define double @foo2_fmf(double %a, double %b) nounwind {
|
||||
; CHECK-P9-NEXT: xsmaddadp 0, 3, 1
|
||||
; CHECK-P9-NEXT: fmr 1, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
%r = fdiv reassoc arcp nsz double %a, %b
|
||||
%r = fdiv reassoc arcp nsz ninf double %a, %b
|
||||
ret double %r
|
||||
}
|
||||
|
||||
@ -651,7 +651,7 @@ define float @goo2_fmf(float %a, float %b) nounwind {
|
||||
; CHECK-P9-NEXT: xsmaddasp 0, 3, 1
|
||||
; CHECK-P9-NEXT: fmr 1, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
%r = fdiv reassoc arcp nsz float %a, %b
|
||||
%r = fdiv reassoc arcp nsz ninf float %a, %b
|
||||
ret float %r
|
||||
}
|
||||
|
||||
@ -705,7 +705,7 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-P9-NEXT: xvmaddasp 0, 1, 34
|
||||
; CHECK-P9-NEXT: xxlor 34, 0, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
%r = fdiv reassoc arcp nsz <4 x float> %a, %b
|
||||
%r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
@ -1,15 +1,38 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s
|
||||
|
||||
; Check if this causes infinite loop when estimation disabled
|
||||
define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: repeated_fp_divisor_noest:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
; CHECK-NEXT: lvx 3, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-NEXT: xxspltw 0, 0, 0
|
||||
; CHECK-NEXT: xvdivsp 0, 35, 0
|
||||
; CHECK-NEXT: lvx 3, 0, 3
|
||||
; CHECK-NEXT: xvmulsp 1, 34, 35
|
||||
; CHECK-NEXT: xvmulsp 34, 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
%ins = insertelement <4 x float> undef, float %a, i32 0
|
||||
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
|
||||
%mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
|
||||
ret <4 x float> %mul
|
||||
}
|
||||
|
||||
define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: repeated_fp_divisor:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; CHECK-NEXT: lvx 3, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l
|
||||
; CHECK-NEXT: lvx 4, 0, 3
|
||||
; CHECK-NEXT: xxspltw 0, 0, 0
|
||||
; CHECK-NEXT: xvresp 1, 0
|
||||
@ -21,7 +44,7 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
|
||||
%ins = insertelement <4 x float> undef, float %a, i32 0
|
||||
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
|
||||
%mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
|
||||
%mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat
|
||||
ret <4 x float> %mul
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@ define float @emit_xsresp() {
|
||||
entry:
|
||||
%0 = load float, float* @a, align 4
|
||||
%1 = load float, float* @b, align 4
|
||||
%div = fdiv arcp float %0, %1
|
||||
%div = fdiv arcp ninf float %0, %1
|
||||
ret float %div
|
||||
; CHECK-LABEL: @emit_xsresp
|
||||
; CHECK: xsresp {{[0-9]+}}
|
||||
@ -38,7 +38,7 @@ define double @emit_xsredp() {
|
||||
entry:
|
||||
%0 = load double, double* @c, align 8
|
||||
%1 = load double, double* @d, align 8
|
||||
%div = fdiv arcp double %0, %1
|
||||
%div = fdiv arcp ninf double %0, %1
|
||||
ret double %div
|
||||
; CHECK-LABEL: @emit_xsredp
|
||||
; CHECK: xsredp {{[0-9]+}}
|
||||
|
@ -120,7 +120,7 @@ define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
|
||||
; AVX-NEXT: retq
|
||||
%vy = insertelement <4 x float> undef, float %y, i32 0
|
||||
%splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%r = fdiv arcp reassoc <4 x float> %x, %splaty
|
||||
%r = fdiv arcp reassoc ninf <4 x float> %x, %splaty
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user