diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80ebbdd8f77..c88861fa855 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10865,7 +10865,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. diff --git a/test/CodeGen/AMDGPU/fdiv.f16.ll b/test/CodeGen/AMDGPU/fdiv.f16.ll index 275a12d9d76..8bcf0660a2c 100644 --- a/test/CodeGen/AMDGPU/fdiv.f16.ll +++ b/test/CodeGen/AMDGPU/fdiv.f16.ll @@ -218,7 +218,7 @@ define amdgpu_kernel void @div_arcp_2_x_pat_f16(half addrspace(1)* %out) #0 { } ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] @@ -230,7 +230,7 @@ define amdgpu_kernel void @div_arcp_k_x_pat_f16(half addrspace(1)* %out) #0 { } ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] diff --git a/test/CodeGen/X86/fmf-flags.ll b/test/CodeGen/X86/fmf-flags.ll index 81a34b510e0..d958378d4f7 100644 --- a/test/CodeGen/X86/fmf-flags.ll +++ b/test/CodeGen/X86/fmf-flags.ll @@ -8,17 +8,11 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: ; X64-NEXT: rsqrtss %xmm0, %xmm1 -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: cmpeqss %xmm0, %xmm2 ; X64-NEXT: mulss %xmm1, %xmm0 -; X64-NEXT: movss {{.*}}(%rip), %xmm3 -; X64-NEXT: mulss %xmm0, %xmm3 ; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: addss {{.*}}(%rip), %xmm0 -; X64-NEXT: mulss %xmm3, %xmm0 -; X64-NEXT: andnps %xmm0, %xmm2 -; X64-NEXT: movss {{.*}}(%rip), %xmm0 -; X64-NEXT: divss %xmm2, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -89,10 +83,14 @@ define double @not_so_fast_mul_add(double %x) { define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, sqrt1(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: @@ -111,3 +109,19 @@ define float @not_so_fast_recip_sqrt(float %x) { ret float %z } +define float @div_arcp_by_const(half %x) { +; X64-LABEL: .LCPI4_0: +; X64-NEXT: .long 1036828672 +; X64-LABEL: div_arcp_by_const: +; X64: movzwl %ax, %edi +; X64: mulss .LCPI4_0(%rip), %xmm0 +; +; X86-LABEL: .LCPI4_0: +; X86-NEXT: .long 1036828672 +; X86-LABEL: div_arcp_by_const: +; X86: movzwl %ax, %eax +; X86: fmuls .LCPI4_0 + %rcp = fdiv arcp half %x, 10.0 + %z = fpext half %rcp to float + ret float %z +}