From 28625d59d9c131564d77d7e2896678bd6d9046b7 Mon Sep 17 00:00:00 2001 From: Chijun Sima Date: Sat, 4 Aug 2018 08:13:47 +0000 Subject: [PATCH] [TailCallElim] Preserve DT and PDT Summary: Previously, in the NewPM pipeline, TailCallElim recalculates the DomTree when it modifies any instruction in the Function. For example, ``` CallInst *CI = dyn_cast(&I); ... CI->setTailCall(); Modified = true; ... if (!Modified || ...) return PreservedAnalyses::all(); ``` After applying this patch, the DomTree only recalculates if needed (plus an extra insertEdge() + an extra deleteEdge() call). When optimizing SQLite with `-passes="default"` pipeline of the newPM, the number of DomTree recalculation decreases by 6.2%, the number of nodes visited by DFS decreases by 2.9%. The time used by DomTree will decrease approximately 1%~2.5% after applying the patch. Statistics: ``` Before the patch: 23010 dom-tree-stats - Number of DomTree recalculations 489264 dom-tree-stats - Number of nodes visited by DFS -- DomTree After the patch: 21581 dom-tree-stats - Number of DomTree recalculations 475088 dom-tree-stats - Number of nodes visited by DFS -- DomTree ``` Reviewers: kuhar, dmgreen, brzycki, grosser, davide Reviewed By: kuhar, brzycki Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49982 llvm-svn: 338954 --- .../llvm/Transforms/Utils/BasicBlockUtils.h | 3 +- .../Scalar/TailRecursionElimination.cpp | 75 ++++++++++++------- lib/Transforms/Utils/BasicBlockUtils.cpp | 7 +- .../2010-06-26-MultipleReturnValues.ll | 2 +- test/Transforms/TailCallElim/EraseBB.ll | 2 +- .../TailCallElim/accum_recursion.ll | 4 +- test/Transforms/TailCallElim/ackermann.ll | 2 +- test/Transforms/TailCallElim/basic.ll | 2 +- test/Transforms/TailCallElim/deopt-bundle.ll | 2 +- .../TailCallElim/dont_reorder_load.ll | 2 +- test/Transforms/TailCallElim/dup_tail.ll | 2 +- test/Transforms/TailCallElim/inf-recursion.ll | 2 +- test/Transforms/TailCallElim/notail.ll | 2 +- .../TailCallElim/opt-remarks-recursion.ll | 2 +- test/Transforms/TailCallElim/reorder_load.ll | 2 +- test/Transforms/TailCallElim/setjmp.ll | 2 +- 16 files changed, 71 insertions(+), 42 deletions(-) diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 0d2f85ebf6d..63b5e6cb2ba 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -229,7 +229,8 @@ void SplitLandingPadPredecessors(BasicBlock *OrigBB, /// value defined by a PHI, propagate the right value into the return. It /// returns the new return instruction in the predecessor. ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, - BasicBlock *Pred); + BasicBlock *Pred, + DomTreeUpdater *DTU = nullptr); /// Split the containing block at the specified instruction - everything before /// SplitBefore stays in the old basic block, and the rest of the instructions diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 818e8373b1c..0b22f56edfa 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -61,6 +61,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -68,6 +69,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -488,12 +491,10 @@ static CallInst *findTRECandidate(Instruction *TI, return CI; } -static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, - BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool eliminateRecursiveTailCall( + CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -593,6 +594,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } + // The entry block was changed from OldEntry to NewEntry. + // The forward DominatorTree needs to be recalculated when the EntryBB is + // changed. In this corner-case we recalculate the entire tree. + DTU.recalculate(*NewEntry->getParent()); } // If this function has self recursive calls in the tail position where some @@ -668,6 +673,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. + DTU.insertEdge(BB, OldEntry); ++NumEliminated; return true; } @@ -676,7 +682,7 @@ static bool foldReturnAndProcessPred( BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) { + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { bool Change = false; // Make sure this block is a trivial return block. @@ -702,17 +708,17 @@ static bool foldReturnAndProcessPred( if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){ LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); + ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); // Cleanup: if all predecessors of BB have been eliminated by // FoldReturnIntoUncondBranch, delete it. It is important to empty it, // because the ret instruction in there is still using a value which // eliminateRecursiveTailCall will attempt to remove. if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - BB->eraseFromParent(); + DTU.deleteBB(BB); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); ++NumRetDuped; Change = true; } @@ -721,24 +727,23 @@ static bool foldReturnAndProcessPred( return Change; } -static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool processReturningBlock( + ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, + SmallVectorImpl &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); if (!CI) return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); } static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, + DomTreeUpdater &DTU) { if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") return false; @@ -773,11 +778,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall, - TTI, AA, ORE); + TTI, AA, ORE, DTU); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = foldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall, TTI, AA, ORE); + Change = foldReturnAndProcessPred( + BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall, TTI, AA, ORE, DTU); MadeChange |= Change; } } @@ -810,16 +815,27 @@ struct TailCallElim : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; + auto *DTWP = getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *PDTWP = getAnalysisIfAvailable(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + return eliminateTailRecursion( F, &getAnalysis().getTTI(F), &getAnalysis().getAAResults(), - &getAnalysis().getORE()); + &getAnalysis().getORE(), DTU); } }; } @@ -843,12 +859,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F, TargetTransformInfo &TTI = AM.getResult(F); AliasAnalysis &AA = AM.getResult(F); auto &ORE = AM.getResult(F); - - bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE); + auto *DT = AM.getCachedResult(F); + auto *PDT = AM.getCachedResult(F); + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); + PA.preserve(); return PA; } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 7f61477d961..94f37daa7e6 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -646,7 +646,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, - BasicBlock *Pred) { + BasicBlock *Pred, + DomTreeUpdater *DTU) { Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); @@ -680,6 +681,10 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // longer branch to them. BB->removePredecessor(Pred); UncondBranch->eraseFromParent(); + + if (DTU) + DTU->deleteEdge(Pred, BB); + return cast(NewRet); } diff --git a/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll b/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll index 053fc954361..48110e3283c 100644 --- a/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll +++ b/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; PR7328 ; PR7506 define i32 @foo(i32 %x) { diff --git a/test/Transforms/TailCallElim/EraseBB.ll b/test/Transforms/TailCallElim/EraseBB.ll index c8290d7ea95..b10d2ed65e5 100644 --- a/test/Transforms/TailCallElim/EraseBB.ll +++ b/test/Transforms/TailCallElim/EraseBB.ll @@ -1,4 +1,4 @@ -; RUN: opt -tailcallelim -S < %s 2>&1 | FileCheck %s +; RUN: opt -tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s ; CHECK: add nsw i32 ; CHECK-NEXT: br label diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll index 1175d581722..d56a1c2a7d3 100644 --- a/test/Transforms/TailCallElim/accum_recursion.ll +++ b/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s -; RUN: opt < %s -passes=tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s define i32 @test1_factorial(i32 %x) { entry: diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll index 83d98b84ea7..b6fbbf7d168 100644 --- a/test/Transforms/TailCallElim/ackermann.ll +++ b/test/Transforms/TailCallElim/ackermann.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; This function contains two tail calls, which should be eliminated -; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim" +; RUN: opt < %s -tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim" define i32 @Ack(i32 %M.1, i32 %N.1) { entry: diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll index b303fa743ca..6e32ada94cd 100644 --- a/test/Transforms/TailCallElim/basic.ll +++ b/test/Transforms/TailCallElim/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s declare void @noarg() declare void @use(i32*) diff --git a/test/Transforms/TailCallElim/deopt-bundle.ll b/test/Transforms/TailCallElim/deopt-bundle.ll index a98c47e9e6f..f651e462c1f 100644 --- a/test/Transforms/TailCallElim/deopt-bundle.ll +++ b/test/Transforms/TailCallElim/deopt-bundle.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s define i32 @f_1(i32 %x) { ; CHECK-LABEL: @f_1( diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll index f8542799cc6..b10936310a2 100644 --- a/test/Transforms/TailCallElim/dont_reorder_load.ll +++ b/test/Transforms/TailCallElim/dont_reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | grep call | count 4 +; RUN: opt < %s -tailcallelim -verify-dom-info -S | grep call | count 4 ; PR4323 ; Several cases where tail call elimination should not move the load above the diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll index f5b87f27644..36eb99a5440 100644 --- a/test/Transforms/TailCallElim/dup_tail.ll +++ b/test/Transforms/TailCallElim/dup_tail.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; Duplicate the return into if.end to enable TCE. -; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s ; CHECK: Number of return duplicated diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll index c121c25aee9..650ac48823d 100644 --- a/test/Transforms/TailCallElim/inf-recursion.ll +++ b/test/Transforms/TailCallElim/inf-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; Don't turn this into an infinite loop, this is probably the implementation ; of fabs and we expect the codegen to lower fabs. diff --git a/test/Transforms/TailCallElim/notail.ll b/test/Transforms/TailCallElim/notail.ll index e6fdbd1ec77..d84e1a1525f 100644 --- a/test/Transforms/TailCallElim/notail.ll +++ b/test/Transforms/TailCallElim/notail.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; CHECK: tail call void @callee0() ; CHECK: notail call void @callee1() diff --git a/test/Transforms/TailCallElim/opt-remarks-recursion.ll b/test/Transforms/TailCallElim/opt-remarks-recursion.ll index 5a9393fd251..b7b5bcbba6e 100644 --- a/test/Transforms/TailCallElim/opt-remarks-recursion.ll +++ b/test/Transforms/TailCallElim/opt-remarks-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -tailcallelim -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s +; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s ; RUN: opt %s -o /dev/null -passes='require,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll index 78621b105f5..c0a6ea00f71 100644 --- a/test/Transforms/TailCallElim/reorder_load.ll +++ b/test/Transforms/TailCallElim/reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; PR4323 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/TailCallElim/setjmp.ll b/test/Transforms/TailCallElim/setjmp.ll index 4ce6ac7965e..8af4bf13a11 100644 --- a/test/Transforms/TailCallElim/setjmp.ll +++ b/test/Transforms/TailCallElim/setjmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -S | FileCheck %s +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; Test that we don't tail call in a functions that calls returns_twice ; functions.