mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 19:42:54 +02:00
[SCEV] SCEVExpander::isHighCostExpansionHelper(): cost-model plain UDiv
Summary: If we don't believe this UDiv is actually a LShr in disguise, things are much worse. First, we try to see if this UDiv actually originates from user code, by looking for `S + 1`, and if found considering this UDiv to be free. But otherwise, we always considered this UDiv to be high-cost. However that is no longer the case with TTI-driven cost model: our default budget is 4, which matches the default cost of UDiv, so now we allow a single UDiv to not be counted as high-cost. While that is the case, it is evident this is actually a regression due to the fact that cost-modelling is incomplete - we did not account for the `add`, `mul` costs yet. That is being addressed in D73728. Cost-modelling for UDiv also seems pretty straight-forward: subtract cost of the UDiv itself, and recurse into both the LHS and RHS. Reviewers: reames, mkazantsev, wmi, sanjoy Reviewed By: mkazantsev Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73722
This commit is contained in:
parent
5c8b9ed0bc
commit
9cb2cc0245
@ -2196,20 +2196,27 @@ bool SCEVExpander::isHighCostExpansionHelper(
|
||||
// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
|
||||
// HowManyLessThans produced to compute a precise expression, rather than a
|
||||
// UDiv from the user's code. If we can't find a UDiv in the code with some
|
||||
// simple searching, assume the former consider UDivExpr expensive to
|
||||
// compute.
|
||||
BasicBlock *ExitingBB = L->getExitingBlock();
|
||||
if (!ExitingBB)
|
||||
return true;
|
||||
// simple searching, we need to account for it's cost.
|
||||
|
||||
// At the beginning of this function we already tried to find existing value
|
||||
// for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
|
||||
// involving division. This is just a simple search heuristic.
|
||||
if (!At)
|
||||
At = &ExitingBB->back();
|
||||
if (!getRelatedExistingExpansion(
|
||||
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
|
||||
return true;
|
||||
BasicBlock *ExitingBB = L->getExitingBlock();
|
||||
if (At || ExitingBB) {
|
||||
if (!At)
|
||||
At = &ExitingBB->back();
|
||||
|
||||
// At the beginning of this function we already tried to find existing
|
||||
// value for plain 'S'. Now try to lookup 'S + 1' since it is common
|
||||
// pattern involving division. This is just a simple search heuristic.
|
||||
if (getRelatedExistingExpansion(
|
||||
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
|
||||
return false; // Consider it to be free.
|
||||
}
|
||||
|
||||
// Need to count the cost of this UDiv.
|
||||
BudgetRemaining -= TTI.getOperationCost(Instruction::UDiv, S->getType());
|
||||
return isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, BudgetRemaining,
|
||||
TTI, Processed) ||
|
||||
isHighCostExpansionHelper(UDivExpr->getRHS(), L, At, BudgetRemaining,
|
||||
TTI, Processed);
|
||||
}
|
||||
|
||||
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
|
||||
|
@ -19,6 +19,9 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
|
||||
; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11
|
||||
; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK: while.body.lr.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 12
|
||||
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
||||
; CHECK: while.body:
|
||||
; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
|
||||
@ -36,10 +39,10 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
|
||||
; CHECK: while.cond.while.end_crit_edge:
|
||||
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
|
||||
; CHECK-NEXT: br label [[WHILE_END]]
|
||||
; CHECK: while.end:
|
||||
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]])
|
||||
; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]])
|
||||
|
Loading…
Reference in New Issue
Block a user