From 7697aa15643a3f54dbfcd6bf49ebd9c95d7c9964 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 1 Nov 2018 06:47:01 +0000 Subject: [PATCH] [IndVars] Smart hard uses detection When rewriting loop exit values, IndVars considers this transform not profitable if the loop instruction has a loop user which it believes cannot be optimized away. In current implementation only calls that immediately use the instruction are considered as such. This patch extends the definition of "hard" users to any side-effecting instructions (which usually cannot be optimized away from the loop) and also allows handling of not just immediate users, but use chains. Differentlai Revision: https://reviews.llvm.org/D51584 Reviewed By: etherzhhb llvm-svn: 345814 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 39 +++++++++----- test/Analysis/ScalarEvolution/pr28705.ll | 6 +-- .../IndVarSimplify/dont-recompute.ll | 51 +++++++++++++++++++ .../IndVarSimplify/lrev-existing-umin.ll | 38 ++++++++++++++ 4 files changed, 118 insertions(+), 16 deletions(-) diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index ec51ad71abc..3e4e0f46ca3 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -145,6 +145,7 @@ class IndVarSimplify { bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); bool rewriteFirstIterationLoopExitValues(Loop *L); + bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); @@ -524,6 +525,29 @@ struct RewritePhi { // As a side effect, reduces the amount of IV processing within the loop. //===----------------------------------------------------------------------===// +bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { + SmallPtrSet Visited; + SmallVector WorkList; + Visited.insert(I); + WorkList.push_back(I); + while (!WorkList.empty()) { + const Instruction *Curr = WorkList.pop_back_val(); + // This use is outside the loop, nothing to do. + if (!L->contains(Curr)) + continue; + // Do we assume it is a "hard" use which will not be eliminated easily? + if (Curr->mayHaveSideEffects()) + return true; + // Otherwise, add all its users to worklist. + for (auto U : Curr->users()) { + auto *UI = cast(U); + if (Visited.insert(UI).second) + WorkList.push_back(UI); + } + } + return false; +} + /// Check to see if this loop has a computable loop-invariant execution count. /// If so, this means that we can compute the final value of any expressions /// that are recurrent in the loop, and substitute the exit values from the loop @@ -598,19 +622,8 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. - if (ExitValue->getSCEVType()>=scMulExpr) { - bool HasHardInternalUses = false; - for (auto *IB : Inst->users()) { - Instruction *UseInstr = cast(IB); - unsigned Opc = UseInstr->getOpcode(); - if (L->contains(UseInstr) && Opc == Instruction::Call) { - HasHardInternalUses = true; - break; - } - } - if (HasHardInternalUses) - continue; - } + if (hasHardUserWithinLoop(L, Inst)) + continue; bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); diff --git a/test/Analysis/ScalarEvolution/pr28705.ll b/test/Analysis/ScalarEvolution/pr28705.ll index 8fbc08e3ca6..9a8487a6c66 100644 --- a/test/Analysis/ScalarEvolution/pr28705.ll +++ b/test/Analysis/ScalarEvolution/pr28705.ll @@ -1,11 +1,11 @@ ; PR28705 ; RUN: opt < %s -indvars -S | FileCheck %s -; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i" -; with "%.sroa.speculated + 1". +; Check IndVarSimplify doesn't replace external use of the induction var +; "%inc.i.i" with "%.sroa.speculated + 1" because it is not profitable. ; ; CHECK-LABEL: @foo( -; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 +; CHECK: %[[EXIT:.+]] = phi i32 [ %inc.i.i, %for.body650 ] ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] ; define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll index c87cd6596c6..22087710a9c 100644 --- a/test/Transforms/IndVarSimplify/dont-recompute.ll +++ b/test/Transforms/IndVarSimplify/dont-recompute.ll @@ -123,3 +123,54 @@ for.end: ; preds = %for.body tail call void @func(i32 %soft_use) ret void } + +; CHECK-LABEL: @test5( +define void @test5(i32 %m) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: tail call void @func(i32 %soft_use) + tail call void @func(i32 %soft_use) + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} + +; CHECK-LABEL: @test6( +define void @test6(i32 %m, i32* %p) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: store i32 %soft_use, i32* %pidx + %pidx = getelementptr i32, i32* %p, i32 %add + store i32 %soft_use, i32* %pidx + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} diff --git a/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/test/Transforms/IndVarSimplify/lrev-existing-umin.ll index 961c9fd944d..fff76675f17 100644 --- a/test/Transforms/IndVarSimplify/lrev-existing-umin.ll +++ b/test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -indvars < %s | FileCheck %s +; Do not rewrite the user outside the loop because we must keep the instruction +; inside the loop due to store. Rewrite doesn't give us any profit. define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { ; CHECK-LABEL: @f( not_zero11.preheader: @@ -22,6 +24,42 @@ not_zero11: %tmp23 = icmp slt i32 %tmp22, %tmp14 br i1 %tmp23, label %not_zero11, label %main.exit.selector +main.exit.selector: +; CHECK-LABEL: main.exit.selector: +; CHECK: %tmp22.lcssa = phi i32 [ %tmp22, %not_zero11 ] +; CHECK: %tmp24 = icmp slt i32 %tmp22.lcssa, %length. + %tmp24 = icmp slt i32 %tmp22, %length.i + br i1 %tmp24, label %not_zero11.postloop, label %leave + +leave: + ret void + +not_zero11.postloop: + ret void +} + +; Rewrite the user outside the loop because there is no hard users inside the loop. +define void @f1(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { +; CHECK-LABEL: @f1( +not_zero11.preheader: + %tmp13 = icmp ugt i32 %length.i, %length.i.88 + %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %not_zero11, label %not_zero11.postloop + +not_zero11: + %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ] + %tmp16 = zext i32 %v_1 to i64 + %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16 + %tmp18 = load i8, i8* %tmp17, align 1 + %tmp19 = zext i8 %tmp18 to i32 + %tmp20 = or i32 %tmp19, %tmp10 + %tmp21 = trunc i32 %tmp20 to i8 + %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16 + %tmp22 = add nuw nsw i32 %v_1, 1 + %tmp23 = icmp slt i32 %tmp22, %tmp14 + br i1 %tmp23, label %not_zero11, label %main.exit.selector + main.exit.selector: ; CHECK-LABEL: main.exit.selector: ; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i