1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

SimplifyLibCalls: Replace fabs libcalls with intrinsics

Add missing fabs(fpext) optimzation that worked with the call,
and also fixes it creating a second fpext when there were multiple
uses.

llvm-svn: 292172
This commit is contained in:
Matt Arsenault 2017-01-17 00:10:40 +00:00
parent 9226f41ce8
commit ccfb3dd68e
11 changed files with 158 additions and 67 deletions

View File

@ -826,6 +826,18 @@ inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::SIToFP>(Op);
}
/// \brief Matches FPTrunc
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
}
/// \brief Matches FPExt
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::FPExt>(Op);
}
//===----------------------------------------------------------------------===//
// Matchers for unary operators
//

View File

@ -1631,6 +1631,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return SelectInst::Create(Cond, Call0, Call1);
}
Value *ExtSrc;
if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
II->getArgOperand(0)->hasOneUse()) {
// fabs (fpext x) -> fpext (fabs x)
Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs,
{ ExtSrc->getType() });
CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
NewFabs->copyFastMathFlags(II);
NewFabs->takeName(II);
return new FPExtInst(NewFabs, II->getType());
}
break;
}
case Intrinsic::cos:

View File

@ -1392,21 +1392,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::fabs: {
// (fptrunc (fabs x)) -> (fabs (fptrunc x))
Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
CI.getType());
Type *IntrinsicType[] = { CI.getType() };
Function *Overload = Intrinsic::getDeclaration(
CI.getModule(), II->getIntrinsicID(), IntrinsicType);
default: break;
case Intrinsic::fabs: {
// (fptrunc (fabs x)) -> (fabs (fptrunc x))
Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
CI.getType());
Type *IntrinsicType[] = { CI.getType() };
Function *Overload = Intrinsic::getDeclaration(
CI.getModule(), II->getIntrinsicID(), IntrinsicType);
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
Value *Args[] = { InnerTrunc };
return CallInst::Create(Overload, Args, OpBundles, II->getName());
}
Value *Args[] = { InnerTrunc };
CallInst *NewCI = CallInst::Create(Overload, Args,
OpBundles, II->getName());
NewCI->copyFastMathFlags(II);
return NewCI;
}
}
}

View File

@ -1210,11 +1210,15 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if (Name == "fabs" && hasFloatVersion(Name))
return optimizeUnaryDoubleFP(CI, B, false);
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
return nullptr;
// fabs/fabsf -> llvm.fabs.*
Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs,
CI->getType());
Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) });
NewCall->takeName(CI);
return NewCall;
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
@ -2029,8 +2033,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizePow(CI, Builder);
case Intrinsic::exp2:
return optimizeExp2(CI, Builder);
case Intrinsic::fabs:
return optimizeFabs(CI, Builder);
case Intrinsic::log:
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:

View File

@ -10,7 +10,8 @@
; DO-SIMPLIFY: call float @roundf(
; DO-SIMPLIFY: call float @nearbyintf(
; DO-SIMPLIFY: call float @truncf(
; DO-SIMPLIFY: call float @fabsf(
; DO-SIMPLIFY: call float @llvm.fabs.f32(
; DO-SIMPLIFY: call fast float @llvm.fabs.f32(
; C89-SIMPLIFY: call float @floorf(
; C89-SIMPLIFY: call float @ceilf(
@ -22,7 +23,10 @@
; DONT-SIMPLIFY: call double @round(
; DONT-SIMPLIFY: call double @nearbyint(
; DONT-SIMPLIFY: call double @trunc(
; DONT-SIMPLIFY: call double @fabs(
; This is replaced with the intrinsic, which does the right thing on
; all platforms.
; DONT-SIMPLIFY: call float @llvm.fabs.f32(
declare double @floor(double)
declare double @ceil(double)
@ -30,6 +34,7 @@ declare double @round(double)
declare double @nearbyint(double)
declare double @trunc(double)
declare double @fabs(double)
declare double @llvm.fabs.f64(double)
define float @test_floor(float %C) {
%D = fpext float %C to double
@ -78,3 +83,12 @@ define float @test_fabs(float %C) {
%F = fptrunc double %E to float
ret float %F
}
; Make sure fast math flags are preserved
define float @test_fabs_fast(float %C) {
%D = fpext float %C to double
; --> fabsf
%E = call fast double @fabs(double %D)
%F = fptrunc double %E to float
ret float %F
}

View File

@ -0,0 +1,21 @@
; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s
declare x86_fp80 @fabsl(x86_fp80)
; CHECK-LABEL: @replace_fabs_call_f80(
; CHECK-NEXT: %fabsl = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
; CHECK-NEXT: ret x86_fp80 %fabsl
define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
%fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
ret x86_fp80 %fabsl
}
; CHECK-LABEL: @fmf_replace_fabs_call_f80(
; CHECK-NEXT: %fabsl = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
; CHECK-NEXT: ret x86_fp80 %fabsl
define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
%fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
ret x86_fp80 %fabsl
}

View File

@ -1,6 +1,10 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s
; Make sure all library calls are eliminated when the input is known positive.
; Make sure libcalls are replaced with intrinsic calls.
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
declare float @fabsf(float)
declare double @fabs(double)
@ -8,46 +12,46 @@ declare fp128 @fabsl(fp128)
declare float @llvm.fma.f32(float, float, float)
declare float @llvm.fmuladd.f32(float, float, float)
define float @square_fabs_call_f32(float %x) {
%mul = fmul float %x, %x
%fabsf = tail call float @fabsf(float %mul)
define float @replace_fabs_call_f32(float %x) {
%fabsf = tail call float @fabsf(float %x)
ret float %fabsf
; CHECK-LABEL: square_fabs_call_f32(
; CHECK-NEXT: %mul = fmul float %x, %x
; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul)
; CHECK-LABEL: @replace_fabs_call_f32(
; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x)
; CHECK-NEXT: ret float %fabsf
}
define double @square_fabs_call_f64(double %x) {
%mul = fmul double %x, %x
%fabs = tail call double @fabs(double %mul)
define double @replace_fabs_call_f64(double %x) {
%fabs = tail call double @fabs(double %x)
ret double %fabs
; CHECK-LABEL: square_fabs_call_f64(
; CHECK-NEXT: %mul = fmul double %x, %x
; CHECK-NEXT: %fabs = tail call double @fabs(double %mul)
; CHECK-LABEL: @replace_fabs_call_f64(
; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
; CHECK-NEXT: ret double %fabs
}
define fp128 @square_fabs_call_f128(fp128 %x) {
%mul = fmul fp128 %x, %x
%fabsl = tail call fp128 @fabsl(fp128 %mul)
define fp128 @replace_fabs_call_f128(fp128 %x) {
%fabsl = tail call fp128 @fabsl(fp128 %x)
ret fp128 %fabsl
; CHECK-LABEL: square_fabs_call_f128(
; CHECK-NEXT: %mul = fmul fp128 %x, %x
; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul)
; CHECK-LABEL: replace_fabs_call_f128(
; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x)
; CHECK-NEXT: ret fp128 %fabsl
}
; Make sure fast math flags are preserved when replacing the libcall.
define float @fmf_replace_fabs_call_f32(float %x) {
%fabsf = tail call nnan float @fabsf(float %x)
ret float %fabsf
; CHECK-LABEL: @fmf_replace_fabs_call_f32(
; CHECK-NEXT: %fabsf = call nnan float @llvm.fabs.f32(float %x)
; CHECK-NEXT: ret float %fabsf
}
; Make sure all intrinsic calls are eliminated when the input is known
; positive.
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
; The fabs cannot be eliminated because %x may be a NaN
define float @square_fabs_intrinsic_f32(float %x) {
%mul = fmul float %x, %x
@ -102,10 +106,8 @@ define float @square_fabs_shrink_call1(float %x) {
ret float %trunc
; CHECK-LABEL: square_fabs_shrink_call1(
; CHECK-NEXT: %ext = fpext float %x to double
; CHECK-NEXT: %sq = fmul double %ext, %ext
; CHECK-NEXT: call double @fabs(double %sq)
; CHECK-NEXT: %trunc = fptrunc double %fabs to float
; CHECK-NEXT: fmul float %x, %x
; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float
; CHECK-NEXT: ret float %trunc
}
@ -118,8 +120,8 @@ define float @square_fabs_shrink_call2(float %x) {
; CHECK-LABEL: square_fabs_shrink_call2(
; CHECK-NEXT: %sq = fmul float %x, %x
; CHECK-NEXT: %fabsf = call float @fabsf(float %sq)
; CHECK-NEXT: ret float %fabsf
; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq)
; CHECK-NEXT: ret float %trunc
}
; CHECK-LABEL: @fabs_select_constant_negative_positive(
@ -214,3 +216,16 @@ define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
; CHECK-NEXT: ret float %fmuladd
}
; Don't introduce a second fpext
; CHECK-LABEL: @multi_use_fabs_fpext(
; CHECK: %fpext = fpext float %x to double
; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext)
; CHECK-NEXT: store volatile double %fpext, double* undef, align 8
; CHECK-NEXT: ret double %fabs
define double @multi_use_fabs_fpext(float %x) {
%fpext = fpext float %x to double
%fabs = call double @llvm.fabs.f64(double %fpext)
store volatile double %fpext, double* undef
ret double %fabs
}

View File

@ -22,8 +22,20 @@ define i32 @test2(float %x, float %y) nounwind uwtable {
%5 = zext i1 %4 to i32
ret i32 %5
; CHECK-LABEL: @test2(
; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
; CHECK-NEXT: fcmp oeq float %fabsf, %y
; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x)
; CHECK-NEXT: fcmp oeq float [[FABS]], %y
}
define i32 @fmf_test2(float %x, float %y) nounwind uwtable {
%1 = fpext float %x to double
%2 = call nnan double @fabs(double %1) nounwind readnone
%3 = fpext float %y to double
%4 = fcmp oeq double %2, %3
%5 = zext i1 %4 to i32
ret i32 %5
; CHECK-LABEL: @fmf_test2(
; CHECK-NEXT: [[FABS:%[0-9]+]] = call nnan float @llvm.fabs.f32(float %x)
; CHECK-NEXT: fcmp oeq float [[FABS]], %y
}
define i32 @test3(float %x, float %y) nounwind uwtable {
@ -99,15 +111,15 @@ define i32 @test8(float %x, float %y) nounwind uwtable {
}
define i32 @test9(float %x, float %y) nounwind uwtable {
%1 = fpext float %y to double
%2 = fpext float %x to double
%3 = call double @fabs(double %2) nounwind readnone
%4 = fcmp oeq double %1, %3
%5 = zext i1 %4 to i32
ret i32 %5
%x.ext = fpext float %x to double
%y.ext = fpext float %y to double
%fabs = call double @fabs(double %x.ext) nounwind readnone
%cmp = fcmp oeq double %y.ext, %fabs
%cmp.ext = zext i1 %cmp to i32
ret i32 %cmp.ext
; CHECK-LABEL: @test9(
; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
; CHECK-NEXT: fcmp oeq float %fabsf, %y
; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
; CHECK-NEXT: fcmp oeq float %fabs, %y
}
define i32 @test10(float %x, float %y) nounwind uwtable {

View File

@ -72,7 +72,7 @@ define float @test_simplify7(float %x) {
; CHECK-LABEL: @test_simplify7(
%retval = call float @powf(float %x, float 0.5)
; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]]
; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]])
; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
ret float %retval
@ -83,7 +83,7 @@ define double @test_simplify8(double %x) {
; CHECK-LABEL: @test_simplify8(
%retval = call double @pow(double %x, double 0.5)
; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
ret double %retval
@ -163,7 +163,7 @@ define double @test_simplify17(double %x) {
; CHECK-LABEL: @test_simplify17(
%retval = call double @llvm.pow.f64(double %x, double 0.5)
; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x)
; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]])
; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
ret double %retval

View File

@ -284,11 +284,11 @@ define float @float_powsqrt(float %x) nounwind readnone {
; WIN64: float @powf
; MINGW32-LABEL: @float_powsqrt(
; MINGW32: float @sqrtf
; MINGW32: float @fabsf
; MINGW32: float @llvm.fabs.f32
; MINGW32-NOT: float @powf
; MINGW64-LABEL: @float_powsqrt(
; MINGW64: float @sqrtf
; MINGW64: float @fabsf
; MINGW64: float @llvm.fabs.f32(
; MINGW64-NOT: float @powf
%1 = call float @powf(float %x, float 0.5)
ret float %1

View File

@ -15,7 +15,7 @@ define double @test(double %X) {
define double @test1(double %X) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[Y:%.*]] = call double @fabs(double %X)
; CHECK-NEXT: [[Y:%.*]] = call double @llvm.fabs.f64(double %X)
; CHECK-NEXT: ret double [[Y]]
;
%Y = call double @fabs(double %X)