mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
guard fsqrt with fmf sub flags
Summary: This change uses fmf subflags to guard optimizations as well as unsafe. These changes originated from D46483. It contains only context for fsqrt. Reviewers: spatel, hfinkel, arsenm Reviewed By: spatel Subscribers: hfinkel, wdng, andrew.w.kaylor, wristow, efriedma, nemanjai Differential Revision: https://reviews.llvm.org/D47749 llvm-svn: 334113
This commit is contained in:
parent
12ea82882e
commit
33448accd6
@ -10893,17 +10893,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
|
||||
if (!DAG.getTarget().Options.UnsafeFPMath)
|
||||
SDNodeFlags Flags = N->getFlags();
|
||||
if (!DAG.getTarget().Options.UnsafeFPMath &&
|
||||
!Flags.hasApproximateFuncs())
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
if (TLI.isFsqrtCheap(N0, DAG))
|
||||
return SDValue();
|
||||
|
||||
// TODO: FSQRT nodes should have flags that propagate to the created nodes.
|
||||
// For now, create a Flags object for use with reassociation math transforms.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setAllowReassociation(true);
|
||||
// FSQRT nodes have flags that propagate to the created nodes.
|
||||
return buildSqrtEstimate(N0, Flags);
|
||||
}
|
||||
|
||||
|
@ -300,18 +300,34 @@ define float @fmul_fma_fast2(float %x) {
|
||||
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
|
||||
; FMFDEBUG: fsqrt afn {{t[0-9]+}}
|
||||
; FMFDEBUG: fmul afn {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
|
||||
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
|
||||
; GLOBALDEBUG: fmul afn {{t[0-9]+}}
|
||||
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
|
||||
|
||||
define float @sqrt_afn(float %x) {
|
||||
; FMF-LABEL: sqrt_afn:
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xssqrtsp 1, 1
|
||||
; FMF-NEXT: blr
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xxlxor 0, 0, 0
|
||||
; FMF-NEXT: fcmpu 0, 1, 0
|
||||
; FMF-NEXT: beq 0, .LBB10_2
|
||||
; FMF-NEXT: # %bb.1:
|
||||
; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
|
||||
; FMF-NEXT: xsrsqrtesp 3, 1
|
||||
; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l
|
||||
; FMF-NEXT: lfsx 0, 0, 3
|
||||
; FMF-NEXT: xsmulsp 2, 1, 0
|
||||
; FMF-NEXT: xsmulsp 4, 3, 3
|
||||
; FMF-NEXT: xssubsp 2, 2, 1
|
||||
; FMF-NEXT: xsmulsp 2, 2, 4
|
||||
; FMF-NEXT: xssubsp 0, 0, 2
|
||||
; FMF-NEXT: xsmulsp 0, 3, 0
|
||||
; FMF-NEXT: xsmulsp 0, 0, 1
|
||||
; FMF-NEXT: .LBB10_2:
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: sqrt_afn:
|
||||
; GLOBAL: # %bb.0:
|
||||
@ -340,18 +356,34 @@ define float @sqrt_afn(float %x) {
|
||||
; The call is now fully 'fast'. This implies that approximation is allowed.
|
||||
|
||||
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
|
||||
; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
|
||||
|
||||
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
|
||||
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
|
||||
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
||||
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
|
||||
|
||||
define float @sqrt_fast(float %x) {
|
||||
; FMF-LABEL: sqrt_fast:
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xssqrtsp 1, 1
|
||||
; FMF-NEXT: blr
|
||||
; FMF: # %bb.0:
|
||||
; FMF-NEXT: xxlxor 0, 0, 0
|
||||
; FMF-NEXT: fcmpu 0, 1, 0
|
||||
; FMF-NEXT: beq 0, .LBB11_2
|
||||
; FMF-NEXT: # %bb.1:
|
||||
; FMF-NEXT: xsrsqrtesp 2, 1
|
||||
; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
|
||||
; FMF-NEXT: fneg 0, 1
|
||||
; FMF-NEXT: fmr 4, 1
|
||||
; FMF-NEXT: addi 3, 3, .LCPI11_0@toc@l
|
||||
; FMF-NEXT: lfsx 3, 0, 3
|
||||
; FMF-NEXT: xsmaddasp 4, 0, 3
|
||||
; FMF-NEXT: xsmulsp 0, 2, 2
|
||||
; FMF-NEXT: xsmaddasp 3, 4, 0
|
||||
; FMF-NEXT: xsmulsp 0, 2, 3
|
||||
; FMF-NEXT: xsmulsp 0, 0, 1
|
||||
; FMF-NEXT: .LBB11_2:
|
||||
; FMF-NEXT: fmr 1, 0
|
||||
; FMF-NEXT: blr
|
||||
;
|
||||
; GLOBAL-LABEL: sqrt_fast:
|
||||
; GLOBAL: # %bb.0:
|
||||
|
@ -7,9 +7,18 @@ declare float @llvm.sqrt.f32(float %x);
|
||||
define float @fast_recip_sqrt(float %x) {
|
||||
; X64-LABEL: fast_recip_sqrt:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: sqrtss %xmm0, %xmm1
|
||||
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: divss %xmm1, %xmm0
|
||||
; X64-NEXT: rsqrtss %xmm0, %xmm1
|
||||
; X64-NEXT: xorps %xmm2, %xmm2
|
||||
; X64-NEXT: cmpeqss %xmm0, %xmm2
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: movss {{.*}}(%rip), %xmm3
|
||||
; X64-NEXT: mulss %xmm0, %xmm3
|
||||
; X64-NEXT: mulss %xmm1, %xmm0
|
||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: mulss %xmm3, %xmm0
|
||||
; X64-NEXT: andnps %xmm0, %xmm2
|
||||
; X64-NEXT: movss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: divss %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: fast_recip_sqrt:
|
||||
|
@ -7,16 +7,16 @@ define float @foo(float %f) #0 {
|
||||
; CHECK: body:
|
||||
; CHECK: %0:fr32 = COPY $xmm0
|
||||
; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0
|
||||
; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1
|
||||
; CHECK: %3:fr32 = VMULSSrr %0, %1
|
||||
; CHECK: %4:fr32 = VMOVSSrm
|
||||
; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4
|
||||
; CHECK: %6:fr32 = VMOVSSrm
|
||||
; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6
|
||||
; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5
|
||||
; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8
|
||||
; CHECK: %7:fr32 = VMULSSrr %1, %6
|
||||
; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5
|
||||
; CHECK: %9:fr32 = VMULSSrr %0, %8
|
||||
; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4
|
||||
; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6
|
||||
; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10
|
||||
; CHECK: %11:fr32 = VMULSSrr %9, %6
|
||||
; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10
|
||||
; CHECK: %14:fr32 = FsFLD0SS
|
||||
; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0
|
||||
; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13
|
||||
|
Loading…
Reference in New Issue
Block a user