[DAGCombine]: Fold X/Sqrt(X) to Sqrt(X)

With FMF ( "nsz" and " reassoc") fold X/Sqrt(X) to Sqrt(X). This is done after targets have the chance to produce a reciprocal sqrt estimate sequence because that expansion is probably more efficient than an expansion of a non-reciprocal sqrt. That is also why we deferred doing this transform in IR (D85709). Differential Revision: https://reviews.llvm.org/D86403
2025-01-31 12:41:49 +01:00 · 2020-08-24 18:12:32 -04:00 · 2020-08-24 18:12:32 -04:00 · 255be4506d
commit 255be4506d
parent 7a4b11cdee
3 changed files with 20 additions and 23 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -13356,6 +13356,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
        return RV;
  }

+  // Fold X/Sqrt(X) -> Sqrt(X)
+  if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+      (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
+    if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
+      return N1;
+
  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
  TargetLowering::NegatibleCost CostN0 =
      TargetLowering::NegatibleCost::Expensive;
--- a/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ b/test/CodeGen/AArch64/sqrt-fastmath.ll
@ -448,8 +448,7 @@ define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
 define double @sqrt_fdiv_common_operand(double %x) nounwind {
 ; FAULT-LABEL: sqrt_fdiv_common_operand:
 ; FAULT:       // %bb.0:
-; FAULT-NEXT:    fsqrt d1, d0
-; FAULT-NEXT:    fdiv d0, d0, d1
+; FAULT-NEXT:    fsqrt d0, d0
 ; FAULT-NEXT:    ret
 ;
 ; CHECK-LABEL: sqrt_fdiv_common_operand:
@ -474,8 +473,7 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind {
 define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
 ; FAULT-LABEL: sqrt_fdiv_common_operand_vec:
 ; FAULT:       // %bb.0:
-; FAULT-NEXT:    fsqrt v1.2d, v0.2d
-; FAULT-NEXT:    fdiv v0.2d, v0.2d, v1.2d
+; FAULT-NEXT:    fsqrt v0.2d, v0.2d
 ; FAULT-NEXT:    ret
 ;
 ; CHECK-LABEL: sqrt_fdiv_common_operand_vec:
@ -493,16 +491,15 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    ret
  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %r = fdiv nsz arcp reassoc <2 x double> %x, %sqrt
+  %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
  ret <2 x double> %r
 }

 define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind {
 ; FAULT-LABEL: sqrt_fdiv_common_operand_extra_use:
 ; FAULT:       // %bb.0:
-; FAULT-NEXT:    fsqrt d1, d0
-; FAULT-NEXT:    fdiv d0, d0, d1
-; FAULT-NEXT:    str d1, [x0]
+; FAULT-NEXT:    fsqrt d0, d0
+; FAULT-NEXT:    str d0, [x0]
 ; FAULT-NEXT:    ret
 ;
 ; CHECK-LABEL: sqrt_fdiv_common_operand_extra_use:
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@ -903,14 +903,12 @@ define <4 x float> @div_sqrt_v4f32(<4 x float> %x, <4 x float> %y) {
 define double @sqrt_fdiv_common_operand(double %x) nounwind {
 ; SSE-LABEL: sqrt_fdiv_common_operand:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    sqrtsd %xmm0, %xmm1
-; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    sqrtsd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sqrt_fdiv_common_operand:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
-; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
  %sqrt = call fast double @llvm.sqrt.f64(double %x)
  %r = fdiv fast double %x, %sqrt
@ -920,33 +918,29 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind {
 define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
 ; SSE-LABEL: sqrt_fdiv_common_operand_vec:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    sqrtpd %xmm0, %xmm1
-; SSE-NEXT:    divpd %xmm1, %xmm0
+; SSE-NEXT:    sqrtpd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sqrt_fdiv_common_operand_vec:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vsqrtpd %xmm0, %xmm1
-; AVX-NEXT:    vdivpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsqrtpd %xmm0, %xmm0
 ; AVX-NEXT:    retq
  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
-  %r = fdiv nsz arcp reassoc <2 x double> %x, %sqrt
+  %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
  ret <2 x double> %r
 }

 define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind {
 ; SSE-LABEL: sqrt_fdiv_common_operand_extra_use:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    sqrtsd %xmm0, %xmm1
-; SSE-NEXT:    movsd %xmm1, (%rdi)
-; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    sqrtsd %xmm0, %xmm0
+; SSE-NEXT:    movsd %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sqrt_fdiv_common_operand_extra_use:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
-; AVX-NEXT:    vmovsd %xmm1, (%rdi)
-; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovsd %xmm0, (%rdi)
 ; AVX-NEXT:    retq
  %sqrt = call fast double @llvm.sqrt.f64(double %x)
  store double %sqrt, double* %p