mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-21 20:12:56 +02:00
transform fmin/fmax calls when possible (PR24314)
If we can ignore NaNs, fmin/fmax libcalls can become compare and select (this is what we turn std::min / std::max into). This IR should then be optimized in the backend to whatever is best for any given target. Eg, x86 can use minss/maxss instructions. This should solve PR24314: https://llvm.org/bugs/show_bug.cgi?id=24314 Differential Revision: http://reviews.llvm.org/D11866 llvm-svn: 245187
This commit is contained in:
parent
7f59e40939
commit
138d4e067f
@ -131,6 +131,7 @@ private:
|
||||
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeFabs(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
|
||||
|
||||
|
@ -1184,6 +1184,60 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
|
||||
return Ret;
|
||||
}
|
||||
|
||||
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
|
||||
// If we can shrink the call to a float function rather than a double
|
||||
// function, do that first.
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) ||
|
||||
(Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) {
|
||||
Value *Ret = optimizeBinaryDoubleFP(CI, B);
|
||||
if (Ret)
|
||||
return Ret;
|
||||
}
|
||||
|
||||
// Make sure this has 2 arguments of FP type which match the result type.
|
||||
FunctionType *FT = Callee->getFunctionType();
|
||||
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
|
||||
FT->getParamType(0) != FT->getParamType(1) ||
|
||||
!FT->getParamType(0)->isFloatingPointTy())
|
||||
return nullptr;
|
||||
|
||||
// FIXME: For finer-grain optimization, we need intrinsics to have the same
|
||||
// fast-math flag decorations that are applied to FP instructions. For now,
|
||||
// we have to rely on the function-level attributes to do this optimization
|
||||
// because there's no other way to express that the calls can be relaxed.
|
||||
IRBuilder<true, ConstantFolder,
|
||||
IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
|
||||
FastMathFlags FMF;
|
||||
Function *F = CI->getParent()->getParent();
|
||||
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
|
||||
if (Attr.getValueAsString() == "true") {
|
||||
// Unsafe algebra sets all fast-math-flags to true.
|
||||
FMF.setUnsafeAlgebra();
|
||||
} else {
|
||||
// At a minimum, no-nans-fp-math must be true.
|
||||
Attr = F->getFnAttribute("no-nans-fp-math");
|
||||
if (Attr.getValueAsString() != "true")
|
||||
return nullptr;
|
||||
// No-signed-zeros is implied by the definitions of fmax/fmin themselves:
|
||||
// "Ideally, fmax would be sensitive to the sign of zero, for example
|
||||
// fmax(−0. 0, +0. 0) would return +0; however, implementation in software
|
||||
// might be impractical."
|
||||
FMF.setNoSignedZeros();
|
||||
FMF.setNoNaNs();
|
||||
}
|
||||
B.SetFastMathFlags(FMF);
|
||||
|
||||
// We have a relaxed floating-point environment. We can ignore NaN-handling
|
||||
// and transform to a compare and select. We do not have to consider errno or
|
||||
// exceptions, because fmin/fmax do not have those.
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
Value *Op1 = CI->getArgOperand(1);
|
||||
Value *Cmp = Callee->getName().startswith("fmin") ?
|
||||
B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
|
||||
return B.CreateSelect(Cmp, Op0, Op1);
|
||||
}
|
||||
|
||||
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
|
||||
@ -2110,11 +2164,16 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||
return optimizeUnaryDoubleFP(CI, Builder, true);
|
||||
return nullptr;
|
||||
case LibFunc::copysign:
|
||||
case LibFunc::fmin:
|
||||
case LibFunc::fmax:
|
||||
if (hasFloatVersion(FuncName))
|
||||
return optimizeBinaryDoubleFP(CI, Builder);
|
||||
return nullptr;
|
||||
case LibFunc::fminf:
|
||||
case LibFunc::fmin:
|
||||
case LibFunc::fminl:
|
||||
case LibFunc::fmaxf:
|
||||
case LibFunc::fmax:
|
||||
case LibFunc::fmaxl:
|
||||
return optimizeFMinFMax(CI, Builder);
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -716,3 +716,110 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
|
||||
; CHECK-NEXT: ret fp128 %fabs
|
||||
}
|
||||
|
||||
; =========================================================================
|
||||
;
|
||||
; Test-cases for fmin / fmax
|
||||
;
|
||||
; =========================================================================
|
||||
|
||||
declare double @fmax(double, double)
|
||||
declare double @fmin(double, double)
|
||||
declare float @fmaxf(float, float)
|
||||
declare float @fminf(float, float)
|
||||
declare fp128 @fmaxl(fp128, fp128)
|
||||
declare fp128 @fminl(fp128, fp128)
|
||||
|
||||
; No NaNs is the minimum requirement to replace these calls.
|
||||
; This should always be set when unsafe-fp-math is true, but
|
||||
; alternate the attributes for additional test coverage.
|
||||
; 'nsz' is implied by the definition of fmax or fmin itself.
|
||||
attributes #1 = { "no-nans-fp-math" = "true" }
|
||||
|
||||
; Shrink and remove the call.
|
||||
define float @max1(float %a, float %b) #0 {
|
||||
%c = fpext float %a to double
|
||||
%d = fpext float %b to double
|
||||
%e = call double @fmax(double %c, double %d)
|
||||
%f = fptrunc double %e to float
|
||||
ret float %f
|
||||
|
||||
; CHECK-LABEL: max1(
|
||||
; CHECK-NEXT: fcmp fast ogt float %a, %b
|
||||
; CHECK-NEXT: select {{.*}} float %a, float %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define float @max2(float %a, float %b) #1 {
|
||||
%c = call float @fmaxf(float %a, float %b)
|
||||
ret float %c
|
||||
|
||||
; CHECK-LABEL: max2(
|
||||
; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b
|
||||
; CHECK-NEXT: select {{.*}} float %a, float %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
|
||||
define double @max3(double %a, double %b) #0 {
|
||||
%c = call double @fmax(double %a, double %b)
|
||||
ret double %c
|
||||
|
||||
; CHECK-LABEL: max3(
|
||||
; CHECK-NEXT: fcmp fast ogt double %a, %b
|
||||
; CHECK-NEXT: select {{.*}} double %a, double %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define fp128 @max4(fp128 %a, fp128 %b) #1 {
|
||||
%c = call fp128 @fmaxl(fp128 %a, fp128 %b)
|
||||
ret fp128 %c
|
||||
|
||||
; CHECK-LABEL: max4(
|
||||
; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b
|
||||
; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
; Shrink and remove the call.
|
||||
define float @min1(float %a, float %b) #1 {
|
||||
%c = fpext float %a to double
|
||||
%d = fpext float %b to double
|
||||
%e = call double @fmin(double %c, double %d)
|
||||
%f = fptrunc double %e to float
|
||||
ret float %f
|
||||
|
||||
; CHECK-LABEL: min1(
|
||||
; CHECK-NEXT: fcmp nnan nsz olt float %a, %b
|
||||
; CHECK-NEXT: select {{.*}} float %a, float %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define float @min2(float %a, float %b) #0 {
|
||||
%c = call float @fminf(float %a, float %b)
|
||||
ret float %c
|
||||
|
||||
; CHECK-LABEL: min2(
|
||||
; CHECK-NEXT: fcmp fast olt float %a, %b
|
||||
; CHECK-NEXT: select {{.*}} float %a, float %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define double @min3(double %a, double %b) #1 {
|
||||
%c = call double @fmin(double %a, double %b)
|
||||
ret double %c
|
||||
|
||||
; CHECK-LABEL: min3(
|
||||
; CHECK-NEXT: fcmp nnan nsz olt double %a, %b
|
||||
; CHECK-NEXT: select {{.*}} double %a, double %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define fp128 @min4(fp128 %a, fp128 %b) #0 {
|
||||
%c = call fp128 @fminl(fp128 %a, fp128 %b)
|
||||
ret fp128 %c
|
||||
|
||||
; CHECK-LABEL: min4(
|
||||
; CHECK-NEXT: fcmp fast olt fp128 %a, %b
|
||||
; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user