mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
Utilize new SDNode flag functionality to expand current support for fdiv
Summary: This patch originated from D46562 and is a proper subset, with some issues addressed. Reviewers: spatel, hfinkel, wristow, arsenm Reviewed By: spatel Subscribers: wdng, nhaehnle Differential Revision: https://reviews.llvm.org/D47954 llvm-svn: 334862
This commit is contained in:
parent
5f358c7b33
commit
7e62703438
@ -10865,7 +10865,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
||||
if (SDValue NewSel = foldBinOpIntoSelect(N))
|
||||
return NewSel;
|
||||
|
||||
if (Options.UnsafeFPMath) {
|
||||
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
|
||||
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
|
||||
if (N1CFP) {
|
||||
// Compute the reciprocal 1.0 / c2.
|
||||
|
@ -218,7 +218,7 @@ define amdgpu_kernel void @div_arcp_2_x_pat_f16(half addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16:
|
||||
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}}
|
||||
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}}
|
||||
|
||||
; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
|
||||
; GFX8_9: buffer_store_short [[MUL]]
|
||||
@ -230,7 +230,7 @@ define amdgpu_kernel void @div_arcp_k_x_pat_f16(half addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16:
|
||||
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}}
|
||||
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}}
|
||||
|
||||
; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
|
||||
; GFX8_9: buffer_store_short [[MUL]]
|
||||
|
@ -8,17 +8,11 @@ define float @fast_recip_sqrt(float %x) {
|
||||
; X64-LABEL: fast_recip_sqrt:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: rsqrtss %xmm0, %xmm1
|
||||
; X64-NEXT: xorps %xmm2, %xmm2
|
||||
; X64-NEXT: cmpeqss %xmm0, %xmm2
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: movss {{.*}}(%rip), %xmm3
|
||||
; X64-NEXT: mulss %xmm0, %xmm3
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: mulss %xmm3, %xmm0
|
||||
; X64-NEXT: andnps %xmm0, %xmm2
|
||||
; X64-NEXT: movss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: divss %xmm2, %xmm0
|
||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: fast_recip_sqrt:
|
||||
@ -89,10 +83,14 @@ define double @not_so_fast_mul_add(double %x) {
|
||||
define float @not_so_fast_recip_sqrt(float %x) {
|
||||
; X64-LABEL: not_so_fast_recip_sqrt:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: sqrtss %xmm0, %xmm1
|
||||
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: divss %xmm1, %xmm0
|
||||
; X64-NEXT: movss %xmm1, {{.*}}(%rip)
|
||||
; X64-NEXT: rsqrtss %xmm0, %xmm1
|
||||
; X64-NEXT: sqrtss %xmm0, %xmm2
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: movss %xmm2, sqrt1(%rip)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: not_so_fast_recip_sqrt:
|
||||
@ -111,3 +109,19 @@ define float @not_so_fast_recip_sqrt(float %x) {
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define float @div_arcp_by_const(half %x) {
|
||||
; X64-LABEL: .LCPI4_0:
|
||||
; X64-NEXT: .long 1036828672
|
||||
; X64-LABEL: div_arcp_by_const:
|
||||
; X64: movzwl %ax, %edi
|
||||
; X64: mulss .LCPI4_0(%rip), %xmm0
|
||||
;
|
||||
; X86-LABEL: .LCPI4_0:
|
||||
; X86-NEXT: .long 1036828672
|
||||
; X86-LABEL: div_arcp_by_const:
|
||||
; X86: movzwl %ax, %eax
|
||||
; X86: fmuls .LCPI4_0
|
||||
%rcp = fdiv arcp half %x, 10.0
|
||||
%z = fpext half %rcp to float
|
||||
ret float %z
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user