1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[CVP] @llvm.[us]{min,max}() intrinsics handling

If we can tell that either one of the arguments is taken,
bypass the intrinsic.

Notably, we are indeed fine with non-strict predicate:
* UL: https://alive2.llvm.org/ce/z/69qVW9 https://alive2.llvm.org/ce/z/kNFTKf
      https://alive2.llvm.org/ce/z/AvaPw2 https://alive2.llvm.org/ce/z/oxo53i
* UG: https://alive2.llvm.org/ce/z/wxHeGH https://alive2.llvm.org/ce/z/Lf76qx
* SL: https://alive2.llvm.org/ce/z/hkeTGS https://alive2.llvm.org/ce/z/eR_b-W
* SG: https://alive2.llvm.org/ce/z/wEqRm7 https://alive2.llvm.org/ce/z/FpAsVr

Much like with all other comparison handling in CVP,
while we could sort-of handle two Value's,
at least for plain ICmpInst it does not appear to be worthwhile.

This only fires 78 times on test-suite + dt + rs,
but we don't canonicalize to these yet. (only SCEV produces them)
This commit is contained in:
Roman Lebedev 2021-04-11 00:23:27 +03:00
parent 18840fafff
commit fcd012c5b0
4 changed files with 49 additions and 8 deletions

View File

@ -77,6 +77,14 @@ public:
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI, bool UseBlockValue);
/// Determine whether the specified value comparison is known to be true
/// or false at the specified instruction. While this takes two Value's,
/// it still requires that one of them is a constant.
/// \p Pred is a CmpInst predicate.
/// If \p UseBlockValue is true, the block value is also taken into account.
Tristate getPredicateAt(unsigned Pred, Value *LHS, Value *RHS,
Instruction *CxtI, bool UseBlockValue);
/// Determine whether the specified value is known to be a constant at the
/// specified instruction. Return null if not.
Constant *getConstant(Value *V, Instruction *CxtI);

View File

@ -1812,6 +1812,24 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
return Unknown;
}
LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS,
Value *RHS,
Instruction *CxtI,
bool UseBlockValue) {
CmpInst::Predicate Pred = (CmpInst::Predicate)P;
if (auto *C = dyn_cast<Constant>(RHS))
return getPredicateAt(P, LHS, C, CxtI, UseBlockValue);
if (auto *C = dyn_cast<Constant>(LHS))
return getPredicateAt(CmpInst::getSwappedPredicate(Pred), RHS, C, CxtI,
UseBlockValue);
// Got two non-Constant values. While we could handle them somewhat,
// by getting their constant ranges, and applying ConstantRange::icmp(),
// so far it did not appear to be profitable.
return LazyValueInfo::Unknown;
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
if (PImpl) {

View File

@ -87,6 +87,7 @@ STATISTIC(NumOverflows, "Number of overflow checks removed");
STATISTIC(NumSaturating,
"Number of saturating arithmetics converted to normal arithmetics");
STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
namespace {
@ -499,6 +500,19 @@ static void processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
processBinOp(BO, LVI);
}
// See if this min/max intrinsic always picks it's one specific operand.
static void processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) {
CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate());
LazyValueInfo::Tristate Result = LVI->getPredicateAt(
Pred, MM->getLHS(), MM->getRHS(), MM, /*UseBlockValue=*/true);
if (Result == LazyValueInfo::Unknown)
return;
++NumMinMax;
MM->replaceAllUsesWith(MM->getOperand(!Result));
MM->eraseFromParent();
}
// Rewrite this with.overflow intrinsic as non-overflowing.
static void processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
IRBuilder<> B(WO);
@ -550,6 +564,11 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
return true;
}
if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) {
processMinMaxIntrinsic(MM, LVI);
return true;
}
if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) {
if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
processOverflowIntrinsic(WO, LVI);

View File

@ -60,8 +60,7 @@ define i8 @test5(i8 %x) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[LIM:%.*]] = icmp ule i8 [[X:%.*]], 42
; CHECK-NEXT: call void @llvm.assume(i1 [[LIM]])
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 42)
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 [[X]]
;
%lim = icmp ule i8 %x, 42
call void @llvm.assume(i1 %lim)
@ -109,8 +108,7 @@ define i8 @test9(i8 %x) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[LIM:%.*]] = icmp uge i8 [[X:%.*]], 42
; CHECK-NEXT: call void @llvm.assume(i1 [[LIM]])
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 42)
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 [[X]]
;
%lim = icmp uge i8 %x, 42
call void @llvm.assume(i1 %lim)
@ -158,8 +156,7 @@ define i8 @test13(i8 %x) {
; CHECK-LABEL: @test13(
; CHECK-NEXT: [[LIM:%.*]] = icmp sle i8 [[X:%.*]], 42
; CHECK-NEXT: call void @llvm.assume(i1 [[LIM]])
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 42)
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 [[X]]
;
%lim = icmp sle i8 %x, 42
call void @llvm.assume(i1 %lim)
@ -207,8 +204,7 @@ define i8 @test17(i8 %x) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[LIM:%.*]] = icmp sge i8 [[X:%.*]], 42
; CHECK-NEXT: call void @llvm.assume(i1 [[LIM]])
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 42)
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 [[X]]
;
%lim = icmp sge i8 %x, 42
call void @llvm.assume(i1 %lim)