1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[SCEV] SCEVExpander::isHighCostExpansionHelper(): cost-model plain UDiv

Summary:
If we don't believe this UDiv is actually a LShr in disguise, things are much worse.
First, we try to see if this UDiv actually originates from user code,
by looking for `S + 1`, and if found considering this UDiv to be free.
But otherwise, we always considered this UDiv to be high-cost.

However that is no longer the case with TTI-driven cost model:
our default budget is 4, which matches the default cost of UDiv,
so now we allow a single UDiv to not be counted as high-cost.

While that is the case, it is evident this is actually a regression
due to the fact that cost-modelling is incomplete - we did not account
for the `add`, `mul` costs yet. That is being addressed in D73728.

Cost-modelling for UDiv also seems pretty straight-forward:
subtract cost of the UDiv itself, and recurse into both the LHS and RHS.

Reviewers: reames, mkazantsev, wmi, sanjoy

Reviewed By: mkazantsev

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73722
This commit is contained in:
Roman Lebedev 2020-02-25 21:52:07 +03:00
parent 5c8b9ed0bc
commit 9cb2cc0245
2 changed files with 25 additions and 15 deletions

View File

@ -2196,20 +2196,27 @@ bool SCEVExpander::isHighCostExpansionHelper(
// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
// HowManyLessThans produced to compute a precise expression, rather than a
// UDiv from the user's code. If we can't find a UDiv in the code with some
// simple searching, assume the former consider UDivExpr expensive to
// compute.
BasicBlock *ExitingBB = L->getExitingBlock();
if (!ExitingBB)
return true;
// simple searching, we need to account for it's cost.
// At the beginning of this function we already tried to find existing value
// for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
// involving division. This is just a simple search heuristic.
if (!At)
At = &ExitingBB->back();
if (!getRelatedExistingExpansion(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
return true;
BasicBlock *ExitingBB = L->getExitingBlock();
if (At || ExitingBB) {
if (!At)
At = &ExitingBB->back();
// At the beginning of this function we already tried to find existing
// value for plain 'S'. Now try to lookup 'S + 1' since it is common
// pattern involving division. This is just a simple search heuristic.
if (getRelatedExistingExpansion(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
return false; // Consider it to be free.
}
// Need to count the cost of this UDiv.
BudgetRemaining -= TTI.getOperationCost(Instruction::UDiv, S->getType());
return isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, BudgetRemaining,
TTI, Processed) ||
isHighCostExpansionHelper(UDivExpr->getRHS(), L, At, BudgetRemaining,
TTI, Processed);
}
// HowManyLessThans uses a Max expression whenever the loop is not guarded by

View File

@ -19,6 +19,9 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11
; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.body.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 12
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
@ -36,10 +39,10 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
; CHECK: while.cond.while.end_crit_edge:
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]])
; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]])