SimplifyLibCalls: Replace fabs libcalls with intrinsics

Add missing fabs(fpext) optimzation that worked with the call, and also fixes it creating a second fpext when there were multiple uses. llvm-svn: 292172
2025-01-31 20:51:52 +01:00 · 2017-01-17 00:10:40 +00:00 · 2017-01-17 00:10:40 +00:00 · ccfb3dd68e
commit ccfb3dd68e
parent 9226f41ce8
11 changed files with 158 additions and 67 deletions
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@ -826,6 +826,18 @@ inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
  return CastClass_match<OpTy, Instruction::SIToFP>(Op);
 }

+/// \brief Matches FPTrunc
+template <typename OpTy>
+inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
+}
+
+/// \brief Matches FPExt
+template <typename OpTy>
+inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::FPExt>(Op);
+}
+
 //===----------------------------------------------------------------------===//
 // Matchers for unary operators
 //
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -1631,6 +1631,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      return SelectInst::Create(Cond, Call0, Call1);
    }

+    Value *ExtSrc;
+    if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
+        II->getArgOperand(0)->hasOneUse()) {
+      // fabs (fpext x) -> fpext (fabs x)
+      Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs,
+                                           { ExtSrc->getType() });
+      CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
+      NewFabs->copyFastMathFlags(II);
+      NewFabs->takeName(II);
+      return new FPExtInst(NewFabs, II->getType());
+    }
+
    break;
  }
  case Intrinsic::cos:
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@ -1392,21 +1392,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
  if (II) {
    switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::fabs: {
-        // (fptrunc (fabs x)) -> (fabs (fptrunc x))
-        Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
-                                                   CI.getType());
-        Type *IntrinsicType[] = { CI.getType() };
-        Function *Overload = Intrinsic::getDeclaration(
-            CI.getModule(), II->getIntrinsicID(), IntrinsicType);
+    default: break;
+    case Intrinsic::fabs: {
+      // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+      Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+                                                 CI.getType());
+      Type *IntrinsicType[] = { CI.getType() };
+      Function *Overload = Intrinsic::getDeclaration(
+        CI.getModule(), II->getIntrinsicID(), IntrinsicType);

-        SmallVector<OperandBundleDef, 1> OpBundles;
-        II->getOperandBundlesAsDefs(OpBundles);
+      SmallVector<OperandBundleDef, 1> OpBundles;
+      II->getOperandBundlesAsDefs(OpBundles);

-        Value *Args[] = { InnerTrunc };
-        return CallInst::Create(Overload, Args, OpBundles, II->getName());
-      }
+      Value *Args[] = { InnerTrunc };
+      CallInst *NewCI =  CallInst::Create(Overload, Args,
+                                          OpBundles, II->getName());
+      NewCI->copyFastMathFlags(II);
+      return NewCI;
+    }
    }
  }

--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@ -1210,11 +1210,15 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {

 Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
  Function *Callee = CI->getCalledFunction();
-  StringRef Name = Callee->getName();
-  if (Name == "fabs" && hasFloatVersion(Name))
-    return optimizeUnaryDoubleFP(CI, B, false);
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());

-  return nullptr;
+  // fabs/fabsf -> llvm.fabs.*
+  Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs,
+                                       CI->getType());
+  Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) });
+  NewCall->takeName(CI);
+  return NewCall;
 }

 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
@ -2029,8 +2033,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
      return optimizePow(CI, Builder);
    case Intrinsic::exp2:
      return optimizeExp2(CI, Builder);
-    case Intrinsic::fabs:
-      return optimizeFabs(CI, Builder);
    case Intrinsic::log:
      return optimizeLog(CI, Builder);
    case Intrinsic::sqrt:
--- a/test/Transforms/InstCombine/double-float-shrink-2.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-2.ll
@ -10,7 +10,8 @@
 ; DO-SIMPLIFY: call float @roundf(
 ; DO-SIMPLIFY: call float @nearbyintf(
 ; DO-SIMPLIFY: call float @truncf(
-; DO-SIMPLIFY: call float @fabsf(
+; DO-SIMPLIFY: call float @llvm.fabs.f32(
+; DO-SIMPLIFY: call fast float @llvm.fabs.f32(

 ; C89-SIMPLIFY: call float @floorf(
 ; C89-SIMPLIFY: call float @ceilf(
@ -22,7 +23,10 @@
 ; DONT-SIMPLIFY: call double @round(
 ; DONT-SIMPLIFY: call double @nearbyint(
 ; DONT-SIMPLIFY: call double @trunc(
-; DONT-SIMPLIFY: call double @fabs(
+
+; This is replaced with the intrinsic, which does the right thing on
+; all platforms.
+; DONT-SIMPLIFY: call float @llvm.fabs.f32(

 declare double @floor(double)
 declare double @ceil(double)
@ -30,6 +34,7 @@ declare double @round(double)
 declare double @nearbyint(double)
 declare double @trunc(double)
 declare double @fabs(double)
+declare double @llvm.fabs.f64(double)

 define float @test_floor(float %C) {
  %D = fpext float %C to double
@ -78,3 +83,12 @@ define float @test_fabs(float %C) {
  %F = fptrunc double %E to float
  ret float %F
 }
+
+; Make sure fast math flags are preserved
+define float @test_fabs_fast(float %C) {
+  %D = fpext float %C to double
+  ; --> fabsf
+  %E = call fast double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
--- a/test/Transforms/InstCombine/fabs-libcall.ll
+++ b/test/Transforms/InstCombine/fabs-libcall.ll
@ -0,0 +1,21 @@
+; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s
+
+declare x86_fp80 @fabsl(x86_fp80)
+
+; CHECK-LABEL: @replace_fabs_call_f80(
+; CHECK-NEXT: %fabsl = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT: ret x86_fp80 %fabsl
+define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
+  %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+
+}
+
+; CHECK-LABEL: @fmf_replace_fabs_call_f80(
+; CHECK-NEXT: %fabsl = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT: ret x86_fp80 %fabsl
+define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
+  %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+}
+
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@ -1,6 +1,10 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s

-; Make sure all library calls are eliminated when the input is known positive.
+; Make sure libcalls are replaced with intrinsic calls.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)

 declare float @fabsf(float)
 declare double @fabs(double)
@ -8,46 +12,46 @@ declare fp128 @fabsl(fp128)
 declare float @llvm.fma.f32(float, float, float)
 declare float @llvm.fmuladd.f32(float, float, float)

-define float @square_fabs_call_f32(float %x) {
-  %mul = fmul float %x, %x
-  %fabsf = tail call float @fabsf(float %mul)
+define float @replace_fabs_call_f32(float %x) {
+  %fabsf = tail call float @fabsf(float %x)
  ret float %fabsf

-; CHECK-LABEL: square_fabs_call_f32(
-; CHECK-NEXT: %mul = fmul float %x, %x
-; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul)
+; CHECK-LABEL: @replace_fabs_call_f32(
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x)
 ; CHECK-NEXT: ret float %fabsf
 }

-define double @square_fabs_call_f64(double %x) {
-  %mul = fmul double %x, %x
-  %fabs = tail call double @fabs(double %mul)
+define double @replace_fabs_call_f64(double %x) {
+  %fabs = tail call double @fabs(double %x)
  ret double %fabs

-; CHECK-LABEL: square_fabs_call_f64(
-; CHECK-NEXT: %mul = fmul double %x, %x
-; CHECK-NEXT: %fabs = tail call double @fabs(double %mul)
+; CHECK-LABEL: @replace_fabs_call_f64(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
 ; CHECK-NEXT: ret double %fabs
 }

-define fp128 @square_fabs_call_f128(fp128 %x) {
-  %mul = fmul fp128 %x, %x
-  %fabsl = tail call fp128 @fabsl(fp128 %mul)
+define fp128 @replace_fabs_call_f128(fp128 %x) {
+  %fabsl = tail call fp128 @fabsl(fp128 %x)
  ret fp128 %fabsl

-; CHECK-LABEL: square_fabs_call_f128(
-; CHECK-NEXT: %mul = fmul fp128 %x, %x
-; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul)
+; CHECK-LABEL: replace_fabs_call_f128(
+; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x)
 ; CHECK-NEXT: ret fp128 %fabsl
 }

+; Make sure fast math flags are preserved when replacing the libcall.
+define float @fmf_replace_fabs_call_f32(float %x) {
+  %fabsf = tail call nnan float @fabsf(float %x)
+  ret float %fabsf
+
+; CHECK-LABEL: @fmf_replace_fabs_call_f32(
+; CHECK-NEXT: %fabsf = call nnan float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: ret float %fabsf
+}
+
 ; Make sure all intrinsic calls are eliminated when the input is known
 ; positive.

-declare float @llvm.fabs.f32(float)
-declare double @llvm.fabs.f64(double)
-declare fp128 @llvm.fabs.f128(fp128)
-
 ; The fabs cannot be eliminated because %x may be a NaN
 define float @square_fabs_intrinsic_f32(float %x) {
  %mul = fmul float %x, %x
@ -102,10 +106,8 @@ define float @square_fabs_shrink_call1(float %x) {
  ret float %trunc

 ; CHECK-LABEL: square_fabs_shrink_call1(
-; CHECK-NEXT: %ext = fpext float %x to double
-; CHECK-NEXT: %sq = fmul double %ext, %ext
-; CHECK-NEXT: call double @fabs(double %sq)
-; CHECK-NEXT: %trunc = fptrunc double %fabs to float
+; CHECK-NEXT: fmul float %x, %x
+; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float
 ; CHECK-NEXT: ret float %trunc
 }

@ -118,8 +120,8 @@ define float @square_fabs_shrink_call2(float %x) {

 ; CHECK-LABEL: square_fabs_shrink_call2(
 ; CHECK-NEXT: %sq = fmul float %x, %x
-; CHECK-NEXT: %fabsf = call float @fabsf(float %sq)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq)
+; CHECK-NEXT: ret float %trunc
 }

 ; CHECK-LABEL: @fabs_select_constant_negative_positive(
@ -214,3 +216,16 @@ define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
 ; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
 ; CHECK-NEXT: ret float %fmuladd
 }
+
+; Don't introduce a second fpext
+; CHECK-LABEL: @multi_use_fabs_fpext(
+; CHECK: %fpext = fpext float %x to double
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext)
+; CHECK-NEXT: store volatile double %fpext, double* undef, align 8
+; CHECK-NEXT: ret double %fabs
+define double @multi_use_fabs_fpext(float %x) {
+  %fpext = fpext float %x to double
+  %fabs = call double @llvm.fabs.f64(double %fpext)
+  store volatile double %fpext, double* undef
+  ret double %fabs
+}
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@ -22,8 +22,20 @@ define i32 @test2(float %x, float %y) nounwind uwtable {
  %5 = zext i1 %4 to i32
  ret i32 %5
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
+; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float [[FABS]], %y
+}
+
+define i32 @fmf_test2(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call nnan double @fabs(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @fmf_test2(
+; CHECK-NEXT: [[FABS:%[0-9]+]] = call nnan float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float [[FABS]], %y
 }

 define i32 @test3(float %x, float %y) nounwind uwtable {
@ -99,15 +111,15 @@ define i32 @test8(float %x, float %y) nounwind uwtable {
 }

 define i32 @test9(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @fabs(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %fabs = call double @fabs(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %fabs
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %fabs, %y
 }

 define i32 @test10(float %x, float %y) nounwind uwtable {
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@ -72,7 +72,7 @@ define float @test_simplify7(float %x) {
 ; CHECK-LABEL: @test_simplify7(
  %retval = call float @powf(float %x, float 0.5)
 ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]]
-; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
  ret float %retval
@ -83,7 +83,7 @@ define double @test_simplify8(double %x) {
 ; CHECK-LABEL: @test_simplify8(
  %retval = call double @pow(double %x, double 0.5)
 ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
  ret double %retval
@ -163,7 +163,7 @@ define double @test_simplify17(double %x) {
 ; CHECK-LABEL: @test_simplify17(
  %retval = call double @llvm.pow.f64(double %x, double 0.5)
 ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x)
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]])
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
  ret double %retval
--- a/test/Transforms/InstCombine/win-math.ll
+++ b/test/Transforms/InstCombine/win-math.ll
@ -284,11 +284,11 @@ define float @float_powsqrt(float %x) nounwind readnone {
 ; WIN64: float @powf
 ; MINGW32-LABEL: @float_powsqrt(
 ; MINGW32: float @sqrtf
-; MINGW32: float @fabsf
+; MINGW32: float @llvm.fabs.f32
 ; MINGW32-NOT: float @powf
 ; MINGW64-LABEL: @float_powsqrt(
 ; MINGW64: float @sqrtf
-; MINGW64: float @fabsf
+; MINGW64: float @llvm.fabs.f32(
 ; MINGW64-NOT: float @powf
    %1 = call float @powf(float %x, float 0.5)
    ret float %1
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@ -15,7 +15,7 @@ define double @test(double %X) {

 define double @test1(double %X) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[Y:%.*]] = call double @fabs(double %X)
+; CHECK-NEXT:    [[Y:%.*]] = call double @llvm.fabs.f64(double %X)
 ; CHECK-NEXT:    ret double [[Y]]
 ;
  %Y = call double @fabs(double %X)