1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[SCEV] rewriteLoopExitValues(): even if have hard uses, still rewrite if cheap (PR44668)

Summary:
Replacing uses of IV outside of the loop is likely generally useful,
but `rewriteLoopExitValues()` is cautious, and if it isn't told to always
perform the replacement, and there are hard uses of IV in loop,
it doesn't replace.

In [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]],
that prevents `-indvars` from replacing uses of induction variable
after the loop, which might be one of the optimization failures
preventing that code from being vectorized.

Instead, now that the cost model is fixed, i believe we should be
a little bit more optimistic, and also perform replacement
if we believe it is within our budget.

Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]].

Reviewers: reames, mkazantsev, asbirlea, fhahn, skatkov

Reviewed By: mkazantsev

Subscribers: nikic, hiraditya, zzheng, javed.absar, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73501
This commit is contained in:
Roman Lebedev 2020-02-25 21:53:00 +03:00
parent f3dbbac126
commit 845a27780f
6 changed files with 35 additions and 36 deletions

View File

@ -1353,16 +1353,16 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
// Computing the value outside of the loop brings no benefit if it is
// definitely used inside the loop in a way which can not be optimized
// away. Avoid doing so unless we know we have a value which computes
// the ExitValue already. TODO: This should be merged into SCEV
// expander to leverage its knowledge of existing expressions.
if (ReplaceExitValue != AlwaysRepl &&
!isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
// away. Avoid doing so unless either we know we have a value
// which computes the ExitValue already, or it is cheap to do so.
// TODO: This should be merged into SCEV expander to leverage
// its knowledge of existing expressions.
bool HighCost = Rewriter.isHighCostExpansion(
ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
if (ReplaceExitValue != AlwaysRepl && HighCost &&
hasHardUserWithinLoop(L, Inst))
continue;
bool HighCost = Rewriter.isHighCostExpansion(
ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "

View File

@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -indvars -S | FileCheck %s
; This tests that the IV is not recomputed outside of the loop when it is known
; to be computed by the loop and used in the loop any way. In the example below
; although a's value can be computed outside of the loop, there is no benefit
; in doing so as it has to be computed by the loop anyway.
; This tests that the IV is recomputed outside of the loop even when it is known
; to be computed by the loop and used in the loop any way, if it is cheap to do
; so. In the example below the value can be computed outside of the loop,
; and we should do so because after that IV is no longer used outside of
; the loop, which is likely beneficial for vectorization.
;
; extern void func(unsigned val);
;
@ -35,8 +36,8 @@ define void @test(i32 %m) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
@ -69,8 +70,8 @@ define i32 @test2(i32 %m) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
br label %for.body
@ -101,8 +102,8 @@ define void @test3(i32 %m) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
@ -141,8 +142,8 @@ define void @test4(i32 %m) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[ADD_LCSSA]], 123
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[TMP0]], 123
; CHECK-NEXT: tail call void @func(i32 [[SOFT_USE]])
; CHECK-NEXT: ret void
;
@ -178,8 +179,8 @@ define void @test5(i32 %m) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
@ -215,8 +216,8 @@ define void @test6(i32 %m, i32* %p) nounwind uwtable {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
; CHECK-NEXT: ret void
;
entry:

View File

@ -143,8 +143,7 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
; CHECK-NEXT: [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
; CHECK-NEXT: br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
; CHECK: innerexit:
; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: br label [[OUTERMERGE]]
; CHECK: outermerge:
; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]

View File

@ -26,8 +26,7 @@ define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp
; CHECK-NEXT: [[TMP23:%.*]] = icmp slt i32 [[TMP22]], [[TMP14]]
; CHECK-NEXT: br i1 [[TMP23]], label [[NOT_ZERO11]], label [[MAIN_EXIT_SELECTOR:%.*]]
; CHECK: main.exit.selector:
; CHECK-NEXT: [[TMP22_LCSSA:%.*]] = phi i32 [ [[TMP22]], [[NOT_ZERO11]] ]
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP22_LCSSA]], [[LENGTH_I]]
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP14]], [[LENGTH_I]]
; CHECK-NEXT: br i1 [[TMP24]], label [[NOT_ZERO11_POSTLOOP]], label [[LEAVE:%.*]]
; CHECK: leave:
; CHECK-NEXT: ret void

View File

@ -16,14 +16,14 @@ define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
; CHECK: for.body650.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY650:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: [[INC_I_I_LCSSA:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[DOTSROA_SPECULATED]], 1
; CHECK-NEXT: br label [[XZ_EXIT]]
; CHECK: XZ.exit:
; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC_I_I_LCSSA]], [[LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP0]], [[LOOPEXIT:%.*]] ]
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: for.body650:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_BODY650_LR_PH]] ], [ [[INC655:%.*]], [[FOR_BODY650]] ]
; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I]], [[FOR_BODY650]] ]
; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
; CHECK-NEXT: [[ARRAYIDX_I_I1105:%.*]] = getelementptr inbounds i8, i8* [[REF_I1174:%.*]], i32 [[IV2]]
; CHECK-NEXT: store i8 7, i8* [[ARRAYIDX_I_I1105]], align 1
; CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[IV2]], 1

View File

@ -72,8 +72,8 @@ define i16 @dom_argument(i16 %arg1, i16 %arg2) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
; CHECK: loop2.end:
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG2]], 2
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
br label %loop1
@ -121,8 +121,8 @@ define i16 @dummy_phi_outside_loop(i16 %arg) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
; CHECK: loop2.end:
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[DUMMY]], 2
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
br label %loop2.preheader
@ -166,8 +166,8 @@ define i16 @neg_loop_carried(i16 %arg) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
; CHECK: loop2.end:
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 2
; CHECK-NEXT: ret i16 [[TMP1]]
;
entry:
br label %loop1