From 162ffcacd64a99c077c8d678c6908b9b787f1275 Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Mon, 19 Dec 2016 08:22:17 +0000 Subject: [PATCH] Revert @llvm.assume with operator bundles (r289755-r289757) This creates non-linear behavior in the inliner (see more details in r289755's commit thread). llvm-svn: 290086 --- docs/LangRef.rst | 9 - include/llvm/Analysis/BasicAliasAnalysis.h | 17 +- include/llvm/Analysis/CodeMetrics.h | 5 +- include/llvm/Analysis/DemandedBits.h | 6 +- include/llvm/Analysis/IVUsers.h | 6 +- include/llvm/Analysis/InlineCost.h | 4 + include/llvm/Analysis/InstructionSimplify.h | 41 ++- include/llvm/Analysis/LazyValueInfo.h | 9 +- .../llvm/Analysis/MemoryDependenceAnalysis.h | 6 +- include/llvm/Analysis/PHITransAddr.h | 8 +- include/llvm/Analysis/ScalarEvolution.h | 13 +- include/llvm/Analysis/ValueTracking.h | 19 +- include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/IPO/InlinerPass.h | 2 + .../Scalar/AlignmentFromAssumptions.h | 3 +- include/llvm/Transforms/Scalar/GVN.h | 4 +- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 3 + .../llvm/Transforms/Scalar/NaryReassociate.h | 4 +- include/llvm/Transforms/Scalar/SROA.h | 10 +- include/llvm/Transforms/Utils/Cloning.h | 9 +- include/llvm/Transforms/Utils/Local.h | 7 +- include/llvm/Transforms/Utils/LoopSimplify.h | 3 +- include/llvm/Transforms/Utils/LoopUtils.h | 1 + include/llvm/Transforms/Utils/MemorySSA.h | 2 +- .../llvm/Transforms/Utils/PromoteMemToReg.h | 4 +- include/llvm/Transforms/Utils/UnrollLoop.h | 3 +- .../llvm/Transforms/Vectorize/LoopVectorize.h | 4 +- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 4 +- lib/Analysis/BasicAliasAnalysis.cpp | 49 +-- lib/Analysis/CMakeLists.txt | 1 + lib/Analysis/CodeMetrics.cpp | 43 ++- lib/Analysis/DemandedBits.cpp | 13 +- lib/Analysis/IVUsers.cpp | 17 +- lib/Analysis/InlineCost.cpp | 41 ++- lib/Analysis/InstructionSimplify.cpp | 282 +++++++++--------- lib/Analysis/LazyValueInfo.cpp | 48 +-- lib/Analysis/Lint.cpp | 20 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 11 +- lib/Analysis/PHITransAddr.cpp | 6 +- lib/Analysis/ScalarEvolution.cpp | 121 +++----- lib/Analysis/ScalarEvolutionExpander.cpp | 2 +- lib/Analysis/ValueTracking.cpp | 231 +++++++------- lib/Passes/PassBuilder.cpp | 1 + lib/Passes/PassRegistry.def | 2 + lib/Transforms/IPO/AlwaysInliner.cpp | 2 + lib/Transforms/IPO/ArgumentPromotion.cpp | 3 + lib/Transforms/IPO/FunctionAttrs.cpp | 3 + lib/Transforms/IPO/InlineSimple.cpp | 8 +- lib/Transforms/IPO/Inliner.cpp | 11 +- lib/Transforms/IPO/PartialInlining.cpp | 24 +- lib/Transforms/IPO/SampleProfile.cpp | 26 +- .../InstCombine/InstCombineAddSub.cpp | 10 +- .../InstCombine/InstCombineAndOrXor.cpp | 18 +- .../InstCombine/InstCombineCalls.cpp | 94 +----- .../InstCombine/InstCombineCompares.cpp | 6 +- .../InstCombine/InstCombineInternal.h | 26 +- .../InstCombineLoadStoreAlloca.cpp | 6 +- .../InstCombine/InstCombineMulDivRem.cpp | 23 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 4 +- .../InstCombine/InstCombineSelect.cpp | 2 +- .../InstCombine/InstCombineShifts.cpp | 6 +- .../InstCombine/InstCombineVectorOps.cpp | 2 +- .../InstCombine/InstructionCombining.cpp | 29 +- .../Scalar/AlignmentFromAssumptions.cpp | 21 +- lib/Transforms/Scalar/EarlyCSE.cpp | 17 +- lib/Transforms/Scalar/GVN.cpp | 16 +- .../Scalar/InductiveRangeCheckElimination.cpp | 6 +- lib/Transforms/Scalar/LoopDataPrefetch.cpp | 17 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 16 +- lib/Transforms/Scalar/LoopInterchange.cpp | 1 + lib/Transforms/Scalar/LoopRotation.cpp | 26 +- lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 1 + lib/Transforms/Scalar/LoopUnrollPass.cpp | 22 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 27 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 19 +- lib/Transforms/Scalar/NaryReassociate.cpp | 19 +- .../Scalar/RewriteStatepointsForGC.cpp | 1 - lib/Transforms/Scalar/SROA.cpp | 15 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 2 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 19 +- lib/Transforms/Utils/InlineFunction.cpp | 32 +- lib/Transforms/Utils/Local.cpp | 3 +- lib/Transforms/Utils/LoopSimplify.cpp | 33 +- lib/Transforms/Utils/LoopUnroll.cpp | 17 +- lib/Transforms/Utils/Mem2Reg.cpp | 15 +- .../Utils/PromoteMemoryToRegister.cpp | 13 +- lib/Transforms/Utils/SimplifyCFG.cpp | 75 ++--- lib/Transforms/Utils/SimplifyInstructions.cpp | 14 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 +- .../Vectorize/LoadStoreVectorizer.cpp | 6 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 49 ++- lib/Transforms/Vectorize/SLPVectorizer.cpp | 24 +- .../no-wrap-unknown-becount.ll | 4 +- .../ScalarEvolution/nsw-offset-assume.ll | 2 +- .../CorrelatedValuePropagation/conflict.ll | 4 +- .../InstCombine/assume-redundant.ll | 2 +- test/Transforms/InstCombine/assume.ll | 26 +- test/Transforms/InstCombine/assume2.ll | 22 +- test/Transforms/InstSimplify/add-mask.ll | 2 +- .../JumpThreading/assume-edge-dom.ll | 4 +- test/Transforms/JumpThreading/assume.ll | 4 +- test/Transforms/LoopRotate/basic.ll | 2 +- .../NaryReassociate/NVPTX/nary-gep.ll | 4 +- .../SimplifyCFG/switch-dead-default.ll | 8 +- unittests/Analysis/AliasAnalysisTest.cpp | 5 +- unittests/Analysis/ScalarEvolutionTest.cpp | 5 +- .../MCJIT/MCJITTestAPICommon.h | 4 + unittests/Transforms/Utils/MemorySSA.cpp | 5 +- 108 files changed, 1177 insertions(+), 853 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 9186929bf12..4dd7157d04e 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -1753,15 +1753,6 @@ site, these bundles may contain any values that are needed by the generated code. For more details, see :ref:`GC Transitions `. -Affected Operand Bundles -^^^^^^^^^^^^^^^^^^^^^^^^ - -Affected operand bundles are characterized by the ``"affected"`` operand bundle -tag. These operand bundles indicate that a call, specifically a call to an -intrinsic like ``llvm.assume``, implies some additional knowledge about the -values within the bundle. This enables the optimizer to efficiently find these -relationships. The optimizer will add these automatically. - .. _moduleasm: Module-Level Inline Assembly diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h index 4f9adedd3ed..f32f5660ea2 100644 --- a/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -26,6 +27,7 @@ #include "llvm/Support/ErrorHandling.h" namespace llvm { +class AssumptionCache; class DominatorTree; class LoopInfo; @@ -39,20 +41,21 @@ class BasicAAResult : public AAResultBase { const DataLayout &DL; const TargetLibraryInfo &TLI; + AssumptionCache &AC; DominatorTree *DT; LoopInfo *LI; public: BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, - DominatorTree *DT = nullptr, + AssumptionCache &AC, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) - : AAResultBase(), DL(DL), TLI(TLI), DT(DT), LI(LI) {} + : AAResultBase(), DL(DL), TLI(TLI), AC(AC), DT(DT), LI(LI) {} BasicAAResult(const BasicAAResult &Arg) - : AAResultBase(Arg), DL(Arg.DL), TLI(Arg.TLI), DT(Arg.DT), + : AAResultBase(Arg), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} BasicAAResult(BasicAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), + : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); @@ -142,11 +145,11 @@ private: static const Value * GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, unsigned &SExtBits, - const DataLayout &DL, unsigned Depth, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW); static bool DecomposeGEPExpression(const Value *V, DecomposedGEP &Decomposed, - const DataLayout &DL, DominatorTree *DT); + const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, @@ -163,7 +166,7 @@ private: bool constantOffsetHeuristic(const SmallVectorImpl &VarIndices, uint64_t V1Size, uint64_t V2Size, int64_t BaseOffset, - DominatorTree *DT); + AssumptionCache *AC, DominatorTree *DT); bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h index 49ffd8b1e11..9e861ac1882 100644 --- a/include/llvm/Analysis/CodeMetrics.h +++ b/include/llvm/Analysis/CodeMetrics.h @@ -20,6 +20,7 @@ #include "llvm/IR/CallSite.h" namespace llvm { +class AssumptionCache; class BasicBlock; class Loop; class Function; @@ -90,12 +91,12 @@ struct CodeMetrics { /// \brief Collect a loop's ephemeral values (those used only by an assume /// or similar intrinsics in the loop). - static void collectEphemeralValues(const Loop *L, + static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl &EphValues); /// \brief Collect a functions's ephemeral values (those used only by an /// assume or similar intrinsics in the function). - static void collectEphemeralValues(const Function *L, + static void collectEphemeralValues(const Function *L, AssumptionCache *AC, SmallPtrSetImpl &EphValues); }; diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index 95d698614bc..c603274a716 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -34,11 +34,12 @@ class FunctionPass; class Function; class Instruction; class DominatorTree; +class AssumptionCache; class DemandedBits { public: - DemandedBits(Function &F, DominatorTree &DT) : - F(F), DT(DT), Analyzed(false) {} + DemandedBits(Function &F, AssumptionCache &AC, DominatorTree &DT) : + F(F), AC(AC), DT(DT), Analyzed(false) {} /// Return the bits demanded from instruction I. APInt getDemandedBits(Instruction *I); @@ -50,6 +51,7 @@ public: private: Function &F; + AssumptionCache &AC; DominatorTree &DT; void performAnalysis(); diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index d6a0778d0eb..e1a5467d8b6 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -22,6 +22,7 @@ namespace llvm { +class AssumptionCache; class DominatorTree; class Instruction; class Value; @@ -93,6 +94,7 @@ private: class IVUsers { friend class IVStrideUse; Loop *L; + AssumptionCache *AC; LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; @@ -106,11 +108,11 @@ class IVUsers { SmallPtrSet EphValues; public: - IVUsers(Loop *L, LoopInfo *LI, DominatorTree *DT, + IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE); IVUsers(IVUsers &&X) - : L(std::move(X.L)), DT(std::move(X.DT)), + : L(std::move(X.L)), AC(std::move(X.AC)), DT(std::move(X.DT)), SE(std::move(X.SE)), Processed(std::move(X.Processed)), IVUses(std::move(X.IVUses)), EphValues(std::move(X.EphValues)) { for (IVStrideUse &U : IVUses) diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index e632e83d296..5e7b00261f6 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -15,10 +15,12 @@ #define LLVM_ANALYSIS_INLINECOST_H #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/AssumptionCache.h" #include #include namespace llvm { +class AssumptionCacheTracker; class CallSite; class DataLayout; class Function; @@ -168,6 +170,7 @@ InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, ProfileSummaryInfo *PSI); /// \brief Get an InlineCost with the callee explicitly specified. @@ -178,6 +181,7 @@ getInlineCost(CallSite CS, const InlineParams &Params, InlineCost getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, ProfileSummaryInfo *PSI); /// \brief Minimal filter to detect invalid constructs for inlining. diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 9c0341b2f29..47d6118313c 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -37,6 +37,7 @@ namespace llvm { template class ArrayRef; + class AssumptionCache; class DominatorTree; class Instruction; class DataLayout; @@ -50,6 +51,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a Sub, fold the result or return null. @@ -57,6 +59,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FAdd, fold the result or return null. @@ -64,6 +67,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FSub, fold the result or return null. @@ -71,6 +75,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FMul, fold the result or return null. @@ -78,24 +83,28 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an SDiv, fold the result or return null. Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a UDiv, fold the result or return null. Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FDiv, fold the result or return null. @@ -103,18 +112,21 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an SRem, fold the result or return null. Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a URem, fold the result or return null. Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FRem, fold the result or return null. @@ -122,6 +134,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a Shl, fold the result or return null. @@ -129,6 +142,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a LShr, fold the result or return null. @@ -136,6 +150,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a AShr, fold the result or return nulll. @@ -143,24 +158,28 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an And, fold the result or return null. Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an Or, fold the result or return null. Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an Xor, fold the result or return null. Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an ICmpInst, fold the result or return null. @@ -168,6 +187,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FCmpInst, fold the result or return null. @@ -175,6 +195,7 @@ namespace llvm { FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a SelectInst, fold the result or return null. @@ -182,6 +203,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a GetElementPtrInst, fold the result or return null. @@ -189,6 +211,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an InsertValueInst, fold the result or return null. @@ -196,6 +219,7 @@ namespace llvm { ArrayRef Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an ExtractValueInst, fold the result or return null. @@ -203,6 +227,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an ExtractElementInst, fold the result or return null. @@ -210,6 +235,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a CastInst, fold the result or return null. @@ -217,6 +243,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); //=== Helper functions for higher up the class hierarchy. @@ -227,6 +254,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for a BinaryOperator, fold the result or return null. @@ -234,6 +262,7 @@ namespace llvm { const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given operands for an FP BinaryOperator, fold the result or return null. @@ -243,6 +272,7 @@ namespace llvm { const FastMathFlags &FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given a function and iterators over arguments, fold the result or return @@ -251,19 +281,22 @@ namespace llvm { User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// Given a function and set of arguments, fold the result or return null. Value *SimplifyCall(Value *V, ArrayRef Args, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); /// See if we can compute a simplified version of this instruction. If not, /// return null. Value *SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. /// @@ -274,7 +307,8 @@ namespace llvm { /// The function returns true if any simplifications were performed. bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); /// Recursively attempt to simplify an instruction. /// @@ -284,7 +318,8 @@ namespace llvm { /// performed. bool recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); } // end namespace llvm #endif diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 6ac836b1db8..610791023a7 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -19,6 +19,7 @@ #include "llvm/Pass.h" namespace llvm { + class AssumptionCache; class Constant; class ConstantRange; class DataLayout; @@ -30,6 +31,7 @@ namespace llvm { /// This pass computes, caches, and vends lazy value constraint information. class LazyValueInfo { friend class LazyValueInfoWrapperPass; + AssumptionCache *AC = nullptr; class TargetLibraryInfo *TLI = nullptr; DominatorTree *DT = nullptr; void *PImpl = nullptr; @@ -38,15 +40,16 @@ class LazyValueInfo { public: ~LazyValueInfo(); LazyValueInfo() {} - LazyValueInfo(TargetLibraryInfo *TLI_, + LazyValueInfo(AssumptionCache *AC_, TargetLibraryInfo *TLI_, DominatorTree *DT_) - : TLI(TLI_), DT(DT_) {} + : AC(AC_), TLI(TLI_), DT(DT_) {} LazyValueInfo(LazyValueInfo &&Arg) - : TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { + : AC(Arg.AC), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { Arg.PImpl = nullptr; } LazyValueInfo &operator=(LazyValueInfo &&Arg) { releaseMemory(); + AC = Arg.AC; TLI = Arg.TLI; DT = Arg.DT; PImpl = Arg.PImpl; diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 465d7c28112..deb5b89c61b 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -30,6 +30,7 @@ class Function; class FunctionPass; class Instruction; class CallSite; +class AssumptionCache; class MemoryDependenceResults; class PredIteratorCache; class DominatorTree; @@ -338,15 +339,16 @@ private: /// Current AA implementation, just a cache. AliasAnalysis &AA; + AssumptionCache &AC; const TargetLibraryInfo &TLI; DominatorTree &DT; PredIteratorCache PredCache; public: - MemoryDependenceResults(AliasAnalysis &AA, + MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT) - : AA(AA), TLI(TLI), DT(DT) {} + : AA(AA), AC(AC), TLI(TLI), DT(DT) {} /// Some methods limit the number of instructions they will examine. /// The return value of this method is the default limit that will be diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h index 997ee478ed2..f0f34f3a51f 100644 --- a/include/llvm/Analysis/PHITransAddr.h +++ b/include/llvm/Analysis/PHITransAddr.h @@ -18,6 +18,7 @@ #include "llvm/IR/Instruction.h" namespace llvm { + class AssumptionCache; class DominatorTree; class DataLayout; class TargetLibraryInfo; @@ -42,12 +43,15 @@ class PHITransAddr { /// TLI - The target library info if known, otherwise null. const TargetLibraryInfo *TLI; + /// A cache of @llvm.assume calls used by SimplifyInstruction. + AssumptionCache *AC; + /// InstInputs - The inputs for our symbolic address. SmallVector InstInputs; public: - PHITransAddr(Value *addr, const DataLayout &DL) - : Addr(addr), DL(DL), TLI(nullptr) { + PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC) + : Addr(addr), DL(DL), TLI(nullptr), AC(AC) { // If the address is an instruction, the whole thing is considered an input. if (Instruction *I = dyn_cast(Addr)) InstInputs.push_back(I); diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index a8afc0bef2c..9dcffe1ac5f 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -37,6 +37,7 @@ namespace llvm { class APInt; +class AssumptionCache; class Constant; class ConstantInt; class DominatorTree; @@ -474,6 +475,9 @@ private: /// TargetLibraryInfo &TLI; + /// The tracker for @llvm.assume intrinsics in this function. + AssumptionCache &AC; + /// The dominator tree. /// DominatorTree &DT; @@ -528,10 +532,6 @@ private: /// ValueExprMapType ValueExprMap; - /// This is a map of SCEVs to intrinsics (e.g. assumptions) that might affect - /// (i.e. imply something about) them. - DenseMap> AffectedMap; - /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; @@ -800,9 +800,6 @@ private: ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); - /// Add to the AffectedMap this SCEV if its operands are in the AffectedMap. - void addAffectedFromOperands(const SCEV *S); - /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); @@ -1110,7 +1107,7 @@ private: bool isAddRecNeverPoison(const Instruction *I, const Loop *L); public: - ScalarEvolution(Function &F, TargetLibraryInfo &TLI, + ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI); ~ScalarEvolution(); ScalarEvolution(ScalarEvolution &&Arg); diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 67263e24f44..5a3e6346ad8 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -24,6 +24,7 @@ namespace llvm { template class ArrayRef; class APInt; class AddOperator; + class AssumptionCache; class DataLayout; class DominatorTree; class GEPOperator; @@ -49,6 +50,7 @@ template class ArrayRef; /// for all of the elements in the vector. void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// Compute known bits from the range metadata. @@ -59,6 +61,7 @@ template class ArrayRef; /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -66,6 +69,7 @@ template class ArrayRef; /// wrapper around computeKnownBits. void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -76,6 +80,7 @@ template class ArrayRef; /// value is either a power of two or zero. bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero = false, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -84,30 +89,35 @@ template class ArrayRef; /// defined. Supports values with integer or pointer type and vectors of /// integers. bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// Returns true if the give value is known to be non-negative. bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// Returns true if the given value is known be positive (i.e. non-negative /// and non-zero). bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// Returns true if the given value is known be negative (i.e. non-positive /// and non-zero). bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// Return true if the given values are known to be non-equal when defined. /// Supports scalar integer types only. bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -122,7 +132,7 @@ template class ArrayRef; /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, - unsigned Depth = 0, + unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -134,7 +144,7 @@ template class ArrayRef; /// sign bits for the vector element with the mininum number of known sign /// bits. unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, - unsigned Depth = 0, + unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -315,20 +325,24 @@ template class ArrayRef; OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT); OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT); OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); /// This version also leverages the sign bit of Add if known. OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); @@ -461,6 +475,7 @@ template class ArrayRef; const DataLayout &DL, bool InvertAPred = false, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); } // end namespace llvm diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 5c90519dfb4..acd99ef9986 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -68,6 +68,7 @@ void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); +void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeAtomicExpandPass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); void initializeBDCELegacyPassPass(PassRegistry &); diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index 402b1cb63ec..de5f5d84579 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -23,6 +23,7 @@ #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" namespace llvm { +class AssumptionCacheTracker; class CallSite; class DataLayout; class InlineCost; @@ -76,6 +77,7 @@ private: bool InsertLifetime; protected: + AssumptionCacheTracker *ACT; ProfileSummaryInfo *PSI; ImportedFunctionsInliningStatistics ImportedFunctionsStats; }; diff --git a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h index cbc4bc772cd..f75dc4dc331 100644 --- a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h +++ b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h @@ -30,7 +30,8 @@ struct AlignmentFromAssumptionsPass PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for old PM. - bool runImpl(Function &F, ScalarEvolution *SE_, DominatorTree *DT_); + bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_, + DominatorTree *DT_); // For memory transfers, we need a common alignment for both the source and // destination. If we have a new alignment for only one operand of a transfer diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h index ea7b88d62e7..8f05e8cdb23 100644 --- a/include/llvm/Transforms/Scalar/GVN.h +++ b/include/llvm/Transforms/Scalar/GVN.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/IR/Dominators.h" @@ -108,6 +109,7 @@ private: MemoryDependenceResults *MD; DominatorTree *DT; const TargetLibraryInfo *TLI; + AssumptionCache *AC; SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; @@ -133,7 +135,7 @@ private: typedef SmallVector AvailValInBlkVect; typedef SmallVector UnavailBlkVect; - bool runImpl(Function &F, DominatorTree &RunDT, + bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, OptimizationRemarkEmitter *ORE); diff --git a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 2cd10e9a7d5..4308e44e7c4 100644 --- a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -32,6 +33,7 @@ class MemCpyOptPass : public PassInfoMixin { MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; std::function LookupAliasAnalysis; + std::function LookupAssumptionCache; std::function LookupDomTree; public: @@ -41,6 +43,7 @@ public: bool runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, std::function LookupAliasAnalysis_, + std::function LookupAssumptionCache_, std::function LookupDomTree_); private: diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h index 1f6f2f56b0b..a74bb6cc419 100644 --- a/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -81,6 +81,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -94,7 +95,7 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for old PM. - bool runImpl(Function &F, DominatorTree *DT_, + bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_); @@ -151,6 +152,7 @@ private: // to be an index of GEP. bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); + AssumptionCache *AC; const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h index 91ecd2bba8a..3e93f46dd4e 100644 --- a/include/llvm/Transforms/Scalar/SROA.h +++ b/include/llvm/Transforms/Scalar/SROA.h @@ -17,14 +17,12 @@ #define LLVM_TRANSFORMS_SCALAR_SROA_H #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { -class AllocaInst; -class SelectInst; -class PHINode; /// A private "module" namespace for types and utilities used by SROA. These /// are implementation details and should not be used by clients. @@ -56,6 +54,7 @@ class SROALegacyPass; class SROA : public PassInfoMixin { LLVMContext *C; DominatorTree *DT; + AssumptionCache *AC; /// \brief Worklist of alloca instructions to simplify. /// @@ -100,7 +99,7 @@ class SROA : public PassInfoMixin { SetVector> SpeculatableSelects; public: - SROA() : C(nullptr), DT(nullptr) {} + SROA() : C(nullptr), DT(nullptr), AC(nullptr) {} /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -110,7 +109,8 @@ private: friend class sroa::SROALegacyPass; /// Helper used by both the public run method and by the legacy pass. - PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT); + PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, + AssumptionCache &RunAC); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index ebe4df88470..edba2bc66dd 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -45,6 +46,7 @@ class DataLayout; class Loop; class LoopInfo; class AllocaInst; +class AssumptionCacheTracker; class DominatorTree; /// Return an exact copy of the specified module @@ -174,12 +176,15 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, /// InlineFunction call, and records the auxiliary results produced by it. class InlineFunctionInfo { public: - explicit InlineFunctionInfo(CallGraph *cg = nullptr) - : CG(cg) {} + explicit InlineFunctionInfo(CallGraph *cg = nullptr, + std::function + *GetAssumptionCache = nullptr) + : CG(cg), GetAssumptionCache(GetAssumptionCache) {} /// CG - If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. CallGraph *CG; + std::function *GetAssumptionCache; /// StaticAllocas - InlineFunction fills this in with all static allocas that /// get copied into the caller. diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index c85b982d94c..490a765c3fa 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -38,6 +38,7 @@ class LoadInst; class Value; class PHINode; class AllocaInst; +class AssumptionCache; class ConstantExpr; class DataLayout; class TargetLibraryInfo; @@ -136,7 +137,7 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// parameter, providing the set of loop header that SimplifyCFG should not /// eliminate. bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, + unsigned BonusInstThreshold, AssumptionCache *AC = nullptr, SmallPtrSetImpl *LoopHeaders = nullptr); /// This function is used to flatten a CFG. For example, it uses parallel-and @@ -175,13 +176,15 @@ AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = nullptr); unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI = nullptr, + AssumptionCache *AC = nullptr, const DominatorTree *DT = nullptr); /// Try to infer an alignment for the specified pointer. static inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI = nullptr, + AssumptionCache *AC = nullptr, const DominatorTree *DT = nullptr) { - return getOrEnforceKnownAlignment(V, 0, DL, CxtI, DT); + return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT); } /// Given a getelementptr instruction/constantexpr, emit the code necessary to diff --git a/include/llvm/Transforms/Utils/LoopSimplify.h b/include/llvm/Transforms/Utils/LoopSimplify.h index 51c973c34d2..f3828bc16e2 100644 --- a/include/llvm/Transforms/Utils/LoopSimplify.h +++ b/include/llvm/Transforms/Utils/LoopSimplify.h @@ -39,6 +39,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H #define LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" @@ -57,7 +58,7 @@ public: /// it into a simplified loop nest with preheaders and single backedges. It will /// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null. bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, - bool PreserveLCSSA); + AssumptionCache *AC, bool PreserveLCSSA); } // end namespace llvm diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 1041f24dc01..845069d4260 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -23,6 +23,7 @@ namespace llvm { class AliasSet; class AliasSetTracker; +class AssumptionCache; class BasicBlock; class DataLayout; class DominatorTree; diff --git a/include/llvm/Transforms/Utils/MemorySSA.h b/include/llvm/Transforms/Utils/MemorySSA.h index 49e6c42d4a9..fc392441494 100644 --- a/include/llvm/Transforms/Utils/MemorySSA.h +++ b/include/llvm/Transforms/Utils/MemorySSA.h @@ -963,7 +963,7 @@ private: if (WalkingPhi && Location.Ptr) { PHITransAddr Translator( const_cast(Location.Ptr), - OriginalAccess->getBlock()->getModule()->getDataLayout()); + OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), DefIterator.getPhiArgBlock(), nullptr, false)) diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h index e2a3ea1c646..b548072c413 100644 --- a/include/llvm/Transforms/Utils/PromoteMemToReg.h +++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -21,6 +21,7 @@ template class ArrayRef; class AllocaInst; class DominatorTree; class AliasSetTracker; +class AssumptionCache; /// \brief Return true if this alloca is legal for promotion. /// @@ -40,7 +41,8 @@ bool isAllocaPromotable(const AllocaInst *AI); /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. void PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, - AliasSetTracker *AST = nullptr); + AliasSetTracker *AST = nullptr, + AssumptionCache *AC = nullptr); } // End llvm namespace diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index d629146ff4c..2ea28f2d4e1 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -23,6 +23,7 @@ namespace llvm { class StringRef; +class AssumptionCache; class DominatorTree; class Loop; class LoopInfo; @@ -36,7 +37,7 @@ bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, + ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA); bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h index d82701cf780..2efc7ca4f8a 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -51,6 +51,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/DemandedBits.h" @@ -82,6 +83,7 @@ struct LoopVectorizePass : public PassInfoMixin { TargetLibraryInfo *TLI; DemandedBits *DB; AliasAnalysis *AA; + AssumptionCache *AC; std::function *GetLAA; OptimizationRemarkEmitter *ORE; @@ -93,7 +95,7 @@ struct LoopVectorizePass : public PassInfoMixin { bool runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, - DemandedBits &DB_, AliasAnalysis &AA_, + DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function &GetLAA_, OptimizationRemarkEmitter &ORE); diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 367c4319536..d669a8e5b61 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -21,6 +21,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -48,6 +49,7 @@ struct SLPVectorizerPass : public PassInfoMixin { AliasAnalysis *AA = nullptr; LoopInfo *LI = nullptr; DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; DemandedBits *DB = nullptr; const DataLayout *DL = nullptr; @@ -57,7 +59,7 @@ public: // Glue for old PM. bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, - DominatorTree *DT_, DemandedBits *DB_); + DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_); private: /// \brief Collect store and getelementptr instructions and organize them diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 55498432e11..50a43fe22e8 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -181,7 +182,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, /*static*/ const Value *BasicAAResult::GetLinearExpression( const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, unsigned &SExtBits, const DataLayout &DL, unsigned Depth, - DominatorTree *DT, bool &NSW, bool &NUW) { + AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -220,7 +221,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, BOp, DT)) { Scale = 1; Offset = 0; @@ -229,23 +230,23 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, LLVM_FALLTHROUGH; case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, - SExtBits, DL, Depth + 1, DT, NSW, NUW); + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); Offset += RHS; break; case Instruction::Sub: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, - SExtBits, DL, Depth + 1, DT, NSW, NUW); + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); Offset -= RHS; break; case Instruction::Mul: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, - SExtBits, DL, Depth + 1, DT, NSW, NUW); + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); Offset *= RHS; Scale *= RHS; break; case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, - SExtBits, DL, Depth + 1, DT, NSW, NUW); + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); Offset <<= RHS.getLimitedValue(); Scale <<= RHS.getLimitedValue(); // the semantics of nsw and nuw for left shifts don't match those of @@ -272,7 +273,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits; const Value *Result = GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, - Depth + 1, DT, NSW, NUW); + Depth + 1, AC, DT, NSW, NUW); // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this // by just incrementing the number of bits we've extended by. @@ -343,7 +344,7 @@ static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) { /// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks /// through pointer casts. bool BasicAAResult::DecomposeGEPExpression(const Value *V, - DecomposedGEP &Decomposed, const DataLayout &DL, + DecomposedGEP &Decomposed, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; @@ -384,9 +385,10 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast(V)) - // TODO: Get a DominatorTree and use it here (it is now available in - // this function, but this should be updated when GetUnderlyingObject - // is updated). TLI should be provided also. + // TODO: Get a DominatorTree and AssumptionCache and use them here + // (these are both now available in this function, but this should be + // updated when GetUnderlyingObject is updated). TLI should be + // provided also. if (const Value *Simplified = SimplifyInstruction(const_cast(I), DL)) { V = Simplified; @@ -448,7 +450,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, APInt IndexScale(Width, 0), IndexOffset(Width, 0); bool NSW = true, NUW = true; Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, - SExtBits, DL, 0, DT, NSW, NUW); + SExtBits, DL, 0, AC, DT, NSW, NUW); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -1057,9 +1059,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *UnderlyingV2) { DecomposedGEP DecompGEP1, DecompGEP2; bool GEP1MaxLookupReached = - DecomposeGEPExpression(GEP1, DecompGEP1, DL, DT); + DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT); bool GEP2MaxLookupReached = - DecomposeGEPExpression(V2, DecompGEP2, DL, DT); + DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT); int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset; int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset; @@ -1221,7 +1223,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, bool SignKnownZero, SignKnownOne; ComputeSignBit(const_cast(V), SignKnownZero, SignKnownOne, DL, - 0, nullptr, DT); + 0, &AC, nullptr, DT); // Zero-extension widens the variable, and so forces the sign // bit to zero. @@ -1256,7 +1258,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return NoAlias; if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size, - GEP1BaseOffset, DT)) + GEP1BaseOffset, &AC, DT)) return NoAlias; } @@ -1658,7 +1660,7 @@ void BasicAAResult::GetIndexDifference( bool BasicAAResult::constantOffsetHeuristic( const SmallVectorImpl &VarIndices, uint64_t V1Size, - uint64_t V2Size, int64_t BaseOffset, + uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, DominatorTree *DT) { if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || V2Size == MemoryLocation::UnknownSize) @@ -1681,11 +1683,11 @@ bool BasicAAResult::constantOffsetHeuristic( bool NSW = true, NUW = true; unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0; const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits, - V0SExtBits, DL, 0, DT, NSW, NUW); + V0SExtBits, DL, 0, AC, DT, NSW, NUW); NSW = true; NUW = true; const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits, - V1SExtBits, DL, 0, DT, NSW, NUW); + V1SExtBits, DL, 0, AC, DT, NSW, NUW); if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits || V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1)) @@ -1719,6 +1721,7 @@ AnalysisKey BasicAA::Key; BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { return BasicAAResult(F.getParent()->getDataLayout(), AM.getResult(F), + AM.getResult(F), &AM.getResult(F), AM.getCachedResult(F)); } @@ -1732,6 +1735,7 @@ void BasicAAWrapperPass::anchor() {} INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", "Basic Alias Analysis (stateless AA impl)", true, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", @@ -1742,12 +1746,13 @@ FunctionPass *llvm::createBasicAAWrapperPass() { } bool BasicAAWrapperPass::runOnFunction(Function &F) { + auto &ACT = getAnalysis(); auto &TLIWP = getAnalysis(); auto &DTWP = getAnalysis(); auto *LIWP = getAnalysisIfAvailable(); Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), - &DTWP.getDomTree(), + ACT.getAssumptionCache(F), &DTWP.getDomTree(), LIWP ? &LIWP->getLoopInfo() : nullptr)); return false; @@ -1755,6 +1760,7 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -1762,5 +1768,6 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { return BasicAAResult( F.getParent()->getDataLayout(), - P.getAnalysis().getTLI()); + P.getAnalysis().getTLI(), + P.getAnalysis().getAssumptionCache(F)); } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 7175eb74904..08d50c29dfc 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAnalysis AliasAnalysisSummary.cpp AliasSetTracker.cpp Analysis.cpp + AssumptionCache.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp BlockFrequencyInfoImpl.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 1f7c6aa3568..bdffdd8eb27 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -70,31 +71,45 @@ static void completeEphemeralValues(SmallPtrSetImpl &Visited, // Find all ephemeral values. void CodeMetrics::collectEphemeralValues( - const Loop *L, SmallPtrSetImpl &EphValues) { + const Loop *L, AssumptionCache *AC, + SmallPtrSetImpl &EphValues) { SmallPtrSet Visited; SmallVector Worklist; - for (auto &B : L->blocks()) - for (auto &I : *B) - if (auto *II = dyn_cast(&I)) - if (II->getIntrinsicID() == Intrinsic::assume && - EphValues.insert(II).second) - appendSpeculatableOperands(II, Visited, Worklist); + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + Instruction *I = cast(AssumeVH); + + // Filter out call sites outside of the loop so we don't do a function's + // worth of work for each of its loops (and, in the common case, ephemeral + // values in the loop are likely due to @llvm.assume calls in the loop). + if (!L->contains(I->getParent())) + continue; + + if (EphValues.insert(I).second) + appendSpeculatableOperands(I, Visited, Worklist); + } completeEphemeralValues(Visited, Worklist, EphValues); } void CodeMetrics::collectEphemeralValues( - const Function *F, SmallPtrSetImpl &EphValues) { + const Function *F, AssumptionCache *AC, + SmallPtrSetImpl &EphValues) { SmallPtrSet Visited; SmallVector Worklist; - for (auto &B : *F) - for (auto &I : B) - if (auto *II = dyn_cast(&I)) - if (II->getIntrinsicID() == Intrinsic::assume && - EphValues.insert(II).second) - appendSpeculatableOperands(II, Visited, Worklist); + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) + continue; + Instruction *I = cast(AssumeVH); + assert(I->getParent()->getParent() == F && + "Found assumption for the wrong function!"); + + if (EphValues.insert(I).second) + appendSpeculatableOperands(I, Visited, Worklist); + } completeEphemeralValues(Visited, Worklist, EphValues); } diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index a52af4921c4..688c1db534c 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,6 +45,7 @@ using namespace llvm; char DemandedBitsWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits", "Demanded bits analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(DemandedBitsWrapperPass, "demanded-bits", "Demanded bits analysis", false, false) @@ -54,6 +56,7 @@ DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID) { void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } @@ -85,13 +88,13 @@ void DemandedBits::determineLiveOperandBits( KnownZero = APInt(BitWidth, 0); KnownOne = APInt(BitWidth, 0); computeKnownBits(const_cast(V1), KnownZero, KnownOne, DL, 0, - UserI, &DT); + &AC, UserI, &DT); if (V2) { KnownZero2 = APInt(BitWidth, 0); KnownOne2 = APInt(BitWidth, 0); computeKnownBits(const_cast(V2), KnownZero2, KnownOne2, DL, - 0, UserI, &DT); + 0, &AC, UserI, &DT); } }; @@ -245,8 +248,9 @@ void DemandedBits::determineLiveOperandBits( } bool DemandedBitsWrapperPass::runOnFunction(Function &F) { + auto &AC = getAnalysis().getAssumptionCache(F); auto &DT = getAnalysis().getDomTree(); - DB.emplace(F, DT); + DB.emplace(F, AC, DT); return false; } @@ -386,8 +390,9 @@ AnalysisKey DemandedBitsAnalysis::Key; DemandedBits DemandedBitsAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); - return DemandedBits(F, DT); + return DemandedBits(F, AC, DT); } PreservedAnalyses DemandedBitsPrinterPass::run(Function &F, diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index b365ed5af15..76e2561b9da 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopPassManager.h" @@ -40,7 +41,8 @@ IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM) { AM.getResult(L).getManager(); Function *F = L.getHeader()->getParent(); - return IVUsers(&L, FAM.getCachedResult(*F), + return IVUsers(&L, FAM.getCachedResult(*F), + FAM.getCachedResult(*F), FAM.getCachedResult(*F), FAM.getCachedResult(*F)); } @@ -53,6 +55,7 @@ PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM) { char IVUsersWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(IVUsersWrapperPass, "iv-users", "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) @@ -260,11 +263,12 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { return IVUses.back(); } -IVUsers::IVUsers(Loop *L, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE) - : L(L), LI(LI), DT(DT), SE(SE), IVUses() { +IVUsers::IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE) + : L(L), AC(AC), LI(LI), DT(DT), SE(SE), IVUses() { // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); - CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for @@ -313,6 +317,7 @@ IVUsersWrapperPass::IVUsersWrapperPass() : LoopPass(ID) { } void IVUsersWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -320,11 +325,13 @@ void IVUsersWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool IVUsersWrapperPass::runOnLoop(Loop *L, LPPassManager &LPM) { + auto *AC = &getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); auto *LI = &getAnalysis().getLoopInfo(); auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); - IU.reset(new IVUsers(L, LI, DT, SE)); + IU.reset(new IVUsers(L, AC, LI, DT, SE)); return false; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 7d4ad48ce3a..0228a1ba38f 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -68,6 +69,9 @@ class CallAnalyzer : public InstVisitor { /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; + /// Getter for the cache of @llvm.assume intrinsics. + std::function &GetAssumptionCache; + /// Profile summary information. ProfileSummaryInfo *PSI; @@ -197,19 +201,20 @@ class CallAnalyzer : public InstVisitor { public: CallAnalyzer(const TargetTransformInfo &TTI, + std::function &GetAssumptionCache, ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, const InlineParams &Params) - : TTI(TTI), PSI(PSI), F(Callee), CandidateCS(CSArg), Params(Params), - Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), - IsRecursiveCall(false), ExposesReturnsTwice(false), - HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee), + CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), + Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -957,7 +962,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, PSI, *F, CS, IndirectCallParams); + CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1313,7 +1318,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(&F, EphValues); + CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this @@ -1446,13 +1451,17 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, ProfileSummaryInfo *PSI) { - return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, PSI); + return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, + GetAssumptionCache, PSI); } InlineCost llvm::getInlineCost( CallSite CS, Function *Callee, const InlineParams &Params, - TargetTransformInfo &CalleeTTI, ProfileSummaryInfo *PSI) { + TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + ProfileSummaryInfo *PSI) { // Cannot inline indirect calls. if (!Callee) @@ -1486,7 +1495,7 @@ InlineCost llvm::getInlineCost( DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, PSI, *Callee, CS, Params); + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 5fd96d117a1..b4686a1ff17 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -50,11 +50,13 @@ struct Query { const DataLayout &DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; + AssumptionCache *AC; const Instruction *CxtI; Query(const DataLayout &DL, const TargetLibraryInfo *tli, - const DominatorTree *dt, const Instruction *cxti = nullptr) - : DL(DL), TLI(tli), DT(dt), CxtI(cxti) {} + const DominatorTree *dt, AssumptionCache *ac = nullptr, + const Instruction *cxti = nullptr) + : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} }; } // end anonymous namespace @@ -582,8 +584,9 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, CxtI), + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -688,7 +691,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.CxtI, Q.DT); + computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (KnownZero == ~APInt::getSignBit(BitWidth)) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. @@ -794,8 +797,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, CxtI), + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -962,35 +966,35 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1089,9 +1093,9 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1107,9 +1111,9 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1154,9 +1158,9 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1230,9 +1234,9 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1248,9 +1252,9 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1276,9 +1280,9 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1346,7 +1350,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.CxtI, Q.DT); + computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (KnownOne.getLimitedValue() >= BitWidth) return UndefValue::get(Op0->getType()); @@ -1382,8 +1386,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1, unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); APInt Op0KnownZero(BitWidth, 0); APInt Op0KnownOne(BitWidth, 0); - computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.CxtI, - Q.DT); + computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC, + Q.CxtI, Q.DT); if (Op0KnownOne[0]) return Op0; } @@ -1412,9 +1416,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, CxtI), + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1437,9 +1441,9 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, CxtI), + return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1461,7 +1465,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, return X; // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.CxtI, Q.DT); + unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return Op0; @@ -1471,9 +1475,9 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, CxtI), + return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1655,9 +1659,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op1; } @@ -1722,9 +1728,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1904,10 +1910,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. if (V1 == B && - MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.CxtI, Q.DT)) + MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; if (V2 == B && - MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.CxtI, Q.DT)) + MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. @@ -1915,10 +1921,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. if (V1 == A && - MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.CxtI, Q.DT)) + MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; if (V2 == A && - MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.CxtI, Q.DT)) + MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } @@ -1935,9 +1941,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1989,9 +1995,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, CxtI), + return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -2306,44 +2312,44 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.CxtI, Q.DT)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } @@ -2574,9 +2580,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, bool RHSKnownNonNegative, RHSKnownNegative; bool YKnownNonNegative, YKnownNegative; ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0, + Q.AC, Q.CxtI, Q.DT); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); if (RHSKnownNonNegative && YKnownNegative) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); if (RHSKnownNegative || YKnownNonNegative) @@ -2594,9 +2600,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, bool LHSKnownNonNegative, LHSKnownNegative; bool YKnownNonNegative, YKnownNegative; ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, + Q.AC, Q.CxtI, Q.DT); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); if (LHSKnownNonNegative && YKnownNegative) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); if (LHSKnownNegative || YKnownNonNegative) @@ -2652,8 +2658,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; LLVM_FALLTHROUGH; @@ -2663,8 +2669,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; LLVM_FALLTHROUGH; @@ -2683,8 +2689,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; LLVM_FALLTHROUGH; @@ -2694,8 +2700,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.CxtI, - Q.DT); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); if (!KnownNonNegative) break; LLVM_FALLTHROUGH; @@ -3220,7 +3226,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // icmp eq|ne X, Y -> false|true if X != Y if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && - isKnownNonEqual(LHS, RHS, Q.DL, Q.CxtI, Q.DT)) { + isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { LLVMContext &Ctx = LHS->getType()->getContext(); return Pred == ICmpInst::ICMP_NE ? ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); @@ -3279,7 +3285,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, unsigned BitWidth = RHSVal->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0)) return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) @@ -3305,9 +3311,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, CxtI), + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3438,10 +3444,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, - Query(DL, TLI, DT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// See if V simplifies when its operand Op is replaced with RepOp. @@ -3708,10 +3714,10 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifySelectInst(Cond, TrueVal, FalseVal, - Query(DL, TLI, DT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. @@ -3827,10 +3833,10 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyGEPInst(SrcTy, Ops, - Query(DL, TLI, DT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. @@ -3864,9 +3870,9 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, Value *llvm::SimplifyInsertValueInst( Value *Agg, Value *Val, ArrayRef Idxs, const DataLayout &DL, - const TargetLibraryInfo *TLI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, CxtI), + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3899,8 +3905,9 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, + AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, CxtI), + return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -3931,8 +3938,8 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, Value *llvm::SimplifyExtractElementInst( Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, const Instruction *CxtI) { - return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, CxtI), + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4006,9 +4013,9 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, CxtI), + return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4101,18 +4108,18 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, CxtI), + return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, const FastMathFlags &FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, CxtI), + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4126,9 +4133,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, CxtI), + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4328,24 +4335,24 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, CxtI), + AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef Args, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, Args.begin(), Args.end(), - Query(DL, TLI, DT, CxtI), RecursionLimit); + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// See if we can compute a simplified version of this instruction. /// If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, AssumptionCache *AC) { Value *Result; switch (I->getOpcode()) { @@ -4354,137 +4361,137 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, I); + TLI, DT, AC, I); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, I); + TLI, DT, AC, I); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Mul: Result = - SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, I); + SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::SDiv: Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - I); + AC, I); break; case Instruction::UDiv: Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - I); + AC, I); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::SRem: Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - I); + AC, I); break; case Instruction::URem: Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - I); + AC, I); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, I); + TLI, DT, AC, I); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), DL, TLI, DT, - I); + AC, I); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), DL, TLI, DT, - I); + AC, I); break; case Instruction::And: Result = - SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, I); + SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::Or: Result = - SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, I); + SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::Xor: Result = - SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, I); + SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, I); + I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast(I)->getPredicate(), I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, I); + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT, I); + I->getOperand(2), DL, TLI, DT, AC, I); break; case Instruction::GetElementPtr: { SmallVector Ops(I->op_begin(), I->op_end()); Result = SimplifyGEPInst(cast(I)->getSourceElementType(), - Ops, DL, TLI, DT, I); + Ops, DL, TLI, DT, AC, I); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT, I); + IV->getIndices(), DL, TLI, DT, AC, I); break; } case Instruction::ExtractValue: { auto *EVI = cast(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), - EVI->getIndices(), DL, TLI, DT, I); + EVI->getIndices(), DL, TLI, DT, AC, I); break; } case Instruction::ExtractElement: { auto *EEI = cast(I); Result = SimplifyExtractElementInst( - EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, I); + EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), Query(DL, TLI, DT, I)); + Result = SimplifyPHINode(cast(I), Query(DL, TLI, DT, AC, I)); break; case Instruction::Call: { CallSite CS(cast(I)); Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL, - TLI, DT, I); + TLI, DT, AC, I); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), - DL, TLI, DT, I); + DL, TLI, DT, AC, I); break; } @@ -4494,7 +4501,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, unsigned BitWidth = I->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, I, DT); + computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); if ((KnownZero | KnownOne).isAllOnesValue()) Result = ConstantInt::get(I->getType(), KnownOne); } @@ -4518,7 +4525,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, + AssumptionCache *AC) { bool Simplified = false; SmallSetVector Worklist; const DataLayout &DL = I->getModule()->getDataLayout(); @@ -4547,7 +4555,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, DL, TLI, DT); + SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC); if (!SimpleV) continue; @@ -4573,14 +4581,16 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT); + const DominatorTree *DT, + AssumptionCache *AC) { + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, + AssumptionCache *AC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index c1561adbcbe..e51e8217360 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -41,6 +42,7 @@ using namespace PatternMatch; char LazyValueInfoWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -577,6 +579,7 @@ namespace { return true; } + AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. const DataLayout &DL; ///< A mandatory DataLayout DominatorTree *DT; ///< An optional DT pointer. @@ -635,8 +638,9 @@ namespace { /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); - LazyValueInfoImpl(const DataLayout &DL, DominatorTree *DT = nullptr) - : DL(DL), DT(DT) {} + LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL, + DominatorTree *DT = nullptr) + : AC(AC), DL(DL), DT(DT) {} }; } // end anonymous namespace @@ -920,16 +924,14 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (!BBI) return; - for (auto *U : Val->users()) { - auto *II = dyn_cast(U); - if (!II) + for (auto &AssumeVH : AC->assumptions()) { + if (!AssumeVH) continue; - if (II->getIntrinsicID() != Intrinsic::assume) - continue; - if (!isValidAssumeForContext(II, BBI, DT)) + auto *I = cast(AssumeVH); + if (!isValidAssumeForContext(I, BBI, DT)) continue; - BBLV = intersect(BBLV, getValueFromCondition(Val, II->getArgOperand(0))); + BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); } // If guards are not used in the module, don't spend time looking for them @@ -1456,16 +1458,18 @@ void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, //===----------------------------------------------------------------------===// /// This lazily constructs the LazyValueInfoImpl. -static LazyValueInfoImpl &getImpl(void *&PImpl, const DataLayout *DL, +static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC, + const DataLayout *DL, DominatorTree *DT = nullptr) { if (!PImpl) { assert(DL && "getCache() called with a null DataLayout"); - PImpl = new LazyValueInfoImpl(*DL, DT); + PImpl = new LazyValueInfoImpl(AC, *DL, DT); } return *static_cast(PImpl); } bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { + Info.AC = &getAnalysis().getAssumptionCache(F); const DataLayout &DL = F.getParent()->getDataLayout(); DominatorTreeWrapperPass *DTWP = @@ -1474,7 +1478,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { Info.TLI = &getAnalysis().getTLI(); if (Info.PImpl) - getImpl(Info.PImpl, &DL, Info.DT).clear(); + getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); // Fully lazy. return false; @@ -1482,6 +1486,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { void LazyValueInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); } @@ -1492,7 +1497,7 @@ LazyValueInfo::~LazyValueInfo() { releaseMemory(); } void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getImpl(PImpl, nullptr); + delete &getImpl(PImpl, AC, nullptr); PImpl = nullptr; } } @@ -1500,10 +1505,11 @@ void LazyValueInfo::releaseMemory() { void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); } LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { + auto &AC = FAM.getResult(F); auto &TLI = FAM.getResult(F); auto *DT = FAM.getCachedResult(F); - return LazyValueInfo(&TLI, DT); + return LazyValueInfo(&AC, &TLI, DT); } /// Returns true if we can statically tell that this value will never be a @@ -1528,7 +1534,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getImpl(PImpl, &DL, DT).getValueInBlock(V, BB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1546,7 +1552,7 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, unsigned Width = V->getType()->getIntegerBitWidth(); const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getImpl(PImpl, &DL, DT).getValueInBlock(V, BB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isUndefined()) return ConstantRange(Width, /*isFullSet=*/false); if (Result.isConstantRange()) @@ -1565,7 +1571,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getImpl(PImpl, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1653,7 +1659,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getImpl(PImpl, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } @@ -1673,7 +1679,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, return LazyValueInfo::True; } const DataLayout &DL = CxtI->getModule()->getDataLayout(); - LVILatticeVal Result = getImpl(PImpl, &DL, DT).getValueAt(V, CxtI); + LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI); Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); if (Ret != Unknown) return Ret; @@ -1763,13 +1769,13 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { if (PImpl) { const DataLayout &DL = PredBB->getModule()->getDataLayout(); - getImpl(PImpl, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); } } void LazyValueInfo::eraseBlock(BasicBlock *BB) { if (PImpl) { const DataLayout &DL = BB->getModule()->getDataLayout(); - getImpl(PImpl, &DL, DT).eraseBlock(BB); + getImpl(PImpl, AC, &DL, DT).eraseBlock(BB); } } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 75d03d18b89..2ca46b1fe87 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -40,6 +40,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -127,6 +128,7 @@ namespace { Module *Mod; const DataLayout *DL; AliasAnalysis *AA; + AssumptionCache *AC; DominatorTree *DT; TargetLibraryInfo *TLI; @@ -143,6 +145,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -182,6 +185,7 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -199,6 +203,7 @@ bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); DL = &F.getParent()->getDataLayout(); AA = &getAnalysis().getAAResults(); + AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); TLI = &getAnalysis().getTLI(); visit(F); @@ -520,7 +525,8 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT) { +static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, + AssumptionCache *AC) { // Assume undef could be zero. if (isa(V)) return true; @@ -529,7 +535,7 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT) { if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, + computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, dyn_cast(V), DT); return KnownZero.isAllOnesValue(); } @@ -560,22 +566,22 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT) { } void Lint::visitSDiv(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT), + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT), + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT), + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT), + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), "Undefined behavior: Division by zero", &I); } @@ -693,7 +699,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT)) + if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast(V)) { if (Value *W = ConstantFoldConstant(C, *DL, TLI)) diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index dcca7bb7205..82a15a654f4 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" @@ -890,7 +891,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( return; } const DataLayout &DL = FromBB->getModule()->getDataLayout(); - PHITransAddr Address(const_cast(Loc.Ptr), DL); + PHITransAddr Address(const_cast(Loc.Ptr), DL, &AC); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying @@ -1647,15 +1648,17 @@ AnalysisKey MemoryDependenceAnalysis::Key; MemoryDependenceResults MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AA = AM.getResult(F); + auto &AC = AM.getResult(F); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); - return MemoryDependenceResults(AA, TLI, DT); + return MemoryDependenceResults(AA, AC, TLI, DT); } char MemoryDependenceWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -1674,6 +1677,7 @@ void MemoryDependenceWrapperPass::releaseMemory() { void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); @@ -1685,8 +1689,9 @@ unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis().getAAResults(); + auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); - MemDep.emplace(AA, TLI, DT); + MemDep.emplace(AA, AC, TLI, DT); return false; } diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 6e9b9e10da4..84ecd4ab980 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Simplify the GEP to handle 'gep x, 0' -> x etc. if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), - GEPOps, DL, TLI, DT)) { + GEPOps, DL, TLI, DT, AC)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -276,7 +276,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -367,7 +367,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, SmallVectorImpl &NewInsts) { // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. - PHITransAddr Tmp(InVal, DL); + PHITransAddr Tmp(InVal, DL, AC); if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true)) return Tmp.getAddr(); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 3a99f2a2157..7962222a210 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -64,8 +64,8 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -1212,7 +1212,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -1599,7 +1598,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || - !AffectedMap.empty()) { + !AC.assumptions().empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is @@ -1665,7 +1664,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -1835,7 +1833,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || - !AffectedMap.empty()) { + !AC.assumptions().empty()) { // If the backedge is guarded by a comparison with the pre-inc // value the addrec is safe. Also, if the entry is guarded by // a comparison with the start value and the backedge is @@ -1893,7 +1891,6 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -2447,7 +2444,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -2740,7 +2736,6 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -2861,7 +2856,6 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -3042,7 +3036,6 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); } S->setNoWrapFlags(Flags); return S; @@ -3198,7 +3191,6 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -3300,7 +3292,6 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addAffectedFromOperands(S); return S; } @@ -3501,40 +3492,9 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { ExprValueMap[Stripped].insert({V, Offset}); } } - - // If this value is an instruction or an argument, and might be affected by - // an assumption, and its SCEV to the AffectedMap. - if (isa(V) || isa(V)) { - for (auto *U : V->users()) { - auto *II = dyn_cast(U); - if (!II) - continue; - if (II->getIntrinsicID() != Intrinsic::assume) - continue; - - AffectedMap[S].insert(II); - } - } - return S; } -// If one of this SCEV's operands is in the AffectedMap (meaning that it might -// be affected by an assumption), then this SCEV might be affected by the same -// assumption. -void ScalarEvolution::addAffectedFromOperands(const SCEV *S) { - if (auto *NS = dyn_cast(S)) - for (auto *Op : NS->operands()) { - auto AMI = AffectedMap.find(Op); - if (AMI == AffectedMap.end()) - continue; - - auto &ISet = AffectedMap[S]; - AMI = AffectedMap.find(Op); - ISet.insert(AMI->second.begin(), AMI->second.end()); - } -} - const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); @@ -4324,7 +4284,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT)) + if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -4512,7 +4472,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, + computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, nullptr, &DT); return Zeros.countTrailingOnes(); } @@ -4683,14 +4643,14 @@ ScalarEvolution::getRange(const SCEV *S, if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, nullptr, &DT); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); if (Ones != ~Zeros + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, nullptr, &DT); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -5179,7 +5139,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(), - 0, nullptr, &DT); + 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -6374,7 +6334,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); bool KnownZero, KnownOne; - ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, + ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, Predecessor->getTerminator(), &DT); auto *Ty = cast(RHS->getType()); if (KnownZero) @@ -7966,23 +7926,16 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, } // Check conditions due to any @llvm.assume intrinsics. - auto CheckAssumptions = [&](const SCEV *S) { - auto AMI = AffectedMap.find(S); - if (AMI != AffectedMap.end()) - for (auto *Assume : AMI->second) { - auto *CI = cast(Assume); - if (!DT.dominates(CI, Latch->getTerminator())) - continue; + for (auto &AssumeVH : AC.assumptions()) { + if (!AssumeVH) + continue; + auto *CI = cast(AssumeVH); + if (!DT.dominates(CI, Latch->getTerminator())) + continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) - return true; - } - - return false; - }; - - if (CheckAssumptions(LHS) || CheckAssumptions(RHS)) - return true; + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } // If the loop is not reachable from the entry block, we risk running into an // infinite loop as we walk up into the dom tree. These loops do not matter @@ -8067,23 +8020,16 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, } // Check conditions due to any @llvm.assume intrinsics. - auto CheckAssumptions = [&](const SCEV *S) { - auto AMI = AffectedMap.find(S); - if (AMI != AffectedMap.end()) - for (auto *Assume : AMI->second) { - auto *CI = cast(Assume); - if (!DT.dominates(CI, L->getHeader())) - continue; + for (auto &AssumeVH : AC.assumptions()) { + if (!AssumeVH) + continue; + auto *CI = cast(AssumeVH); + if (!DT.dominates(CI, L->getHeader())) + continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) - return true; - } - - return false; - }; - - if (CheckAssumptions(LHS) || CheckAssumptions(RHS)) - return true; + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } return false; } @@ -9536,8 +9482,9 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, - DominatorTree &DT, LoopInfo &LI) - : F(F), TLI(TLI), DT(DT), LI(LI), + AssumptionCache &AC, DominatorTree &DT, + LoopInfo &LI) + : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), CouldNotCompute(new SCEVCouldNotCompute()), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), @@ -9559,7 +9506,7 @@ ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, } ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) - : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), DT(Arg.DT), + : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), @@ -10030,7 +9977,7 @@ void ScalarEvolution::verify() const { // Gather stringified backedge taken counts for all loops using a fresh // ScalarEvolution object. - ScalarEvolution SE2(F, TLI, DT, LI); + ScalarEvolution SE2(F, TLI, AC, DT, LI); for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); @@ -10071,6 +10018,7 @@ AnalysisKey ScalarEvolutionAnalysis::Key; ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, FunctionAnalysisManager &AM) { return ScalarEvolution(F, AM.getResult(F), + AM.getResult(F), AM.getResult(F), AM.getResult(F)); } @@ -10083,6 +10031,7 @@ ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -10097,6 +10046,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( F, getAnalysis().getTLI(), + getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getLoopInfo())); return false; @@ -10117,6 +10067,7 @@ void ScalarEvolutionWrapperPass::verifyAnalysis() const { void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index c558bba5740..d15a7dbd20e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1800,7 +1800,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // so narrow phis can reuse them. for (PHINode *Phi : Phis) { auto SimplifyPHINode = [&](PHINode *PN) -> Value * { - if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT)) + if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) return V; if (!SE.isSCEVable(PN->getType())) return nullptr; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 601fef85239..950a2fbcff9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Loads.h" @@ -73,6 +73,7 @@ namespace { // figuring out if we can use it. struct Query { const DataLayout &DL; + AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; @@ -88,11 +89,12 @@ struct Query { std::array Excluded; unsigned NumExcluded; - Query(const DataLayout &DL, const Instruction *CxtI, const DominatorTree *DT) - : DL(DL), CxtI(CxtI), DT(DT), NumExcluded(0) {} + Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), NumExcluded(0) {} Query(const Query &Q, const Value *NewExcl) - : DL(Q.DL), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) { + : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); @@ -128,15 +130,15 @@ static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { ::computeKnownBits(V, KnownZero, KnownOne, Depth, - Query(DL, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, - const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); @@ -145,8 +147,8 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, IntegerType *IT = cast(LHS->getType()->getScalarType()); APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, CxtI, DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, CxtI, DT); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); } @@ -155,10 +157,10 @@ static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { ::ComputeSignBit(V, KnownZero, KnownOne, Depth, - Query(DL, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT)); } static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, @@ -166,51 +168,58 @@ static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, - unsigned Depth, const Instruction *CxtI, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(DL, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT)); } static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonZero(V, Depth, Query(DL, safeCxtI(V, CxtI), DT)); + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, - unsigned Depth, const Instruction *CxtI, + unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, CxtI, DT); + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); return NonNegative; } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, const DominatorTree *DT) { + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { if (auto *CI = dyn_cast(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. - return isKnownNonNegative(V, DL, Depth, CxtI, DT) && - isKnownNonZero(V, DL, Depth, CxtI, DT); + return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) && + isKnownNonZero(V, DL, Depth, AC, CxtI, DT); } bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, const DominatorTree *DT) { + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, CxtI, DT); + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); return Negative; } static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, - const DataLayout &DL, const Instruction *CxtI, + const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonEqual(V1, V2, Query(DL, safeCxtI(V1, safeCxtI(V2, CxtI)), + return ::isKnownNonEqual(V1, V2, Query(DL, AC, + safeCxtI(V1, safeCxtI(V2, CxtI)), DT)); } @@ -219,19 +228,20 @@ static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, - unsigned Depth, const Instruction *CxtI, - const DominatorTree *DT) { + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { return ::MaskedValueIsZero(V, Mask, Depth, - Query(DL, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT)); } static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q); unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, - unsigned Depth, const Instruction *CxtI, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { - return ::ComputeNumSignBits(V, Depth, Query(DL, safeCxtI(V, CxtI), DT)); + return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, @@ -511,33 +521,36 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! - if (!Q.CxtI) + if (!Q.AC || !Q.CxtI) return; unsigned BitWidth = KnownZero.getBitWidth(); - for (auto *U : V->users()) { - auto *II = dyn_cast(U); - if (!II) + for (auto &AssumeVH : Q.AC->assumptions()) { + if (!AssumeVH) continue; - if (II->getIntrinsicID() != Intrinsic::assume) - continue; - if (Q.isExcluded(II)) + CallInst *I = cast(AssumeVH); + assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && + "Got assumption for the wrong function!"); + if (Q.isExcluded(I)) continue; - Value *Arg = II->getArgOperand(0); + // Warning: This loop can end up being somewhat performance sensetive. + // We're running this loop for once for each value queried resulting in a + // runtime of ~O(#assumes * #values). - if (Arg == V && isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && + "must be an assume intrinsic"); + + Value *Arg = I->getArgOperand(0); + + if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); KnownOne.setAllBits(); return; } - // Note that the patterns below need to be kept in sync with the code - // in InstCombiner::visitCallInst that adds relevant values to each - // assume's operand bundles. - // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth == MaxDepth) continue; @@ -551,20 +564,20 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. @@ -574,11 +587,11 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. @@ -588,11 +601,11 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. @@ -602,11 +615,11 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. @@ -616,11 +629,11 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, @@ -633,11 +646,11 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are @@ -650,9 +663,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); @@ -661,9 +674,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); @@ -674,9 +687,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_AShr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero << C->getZExtValue(); @@ -687,9 +700,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne << C->getZExtValue(); @@ -697,9 +710,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -708,9 +721,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -719,9 +732,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -730,9 +743,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -741,9 +754,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. KnownZero |= @@ -751,13 +764,13 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(II, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, II)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, II))) + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); else @@ -3118,7 +3131,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) - // TODO: Acquire a DominatorTree and use it. + // TODO: Acquire a DominatorTree and AssumptionCache and use them. if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) { V = Simplified; continue; @@ -3403,6 +3416,7 @@ bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { // Multiplying n * m significant bits yields a result of n + m significant @@ -3416,8 +3430,10 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, APInt LHSKnownOne(BitWidth, 0); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, CxtI, DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, CxtI, DT); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI, + DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI, + DT); // Note that underestimating the number of zero bits gives a more // conservative answer. unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + @@ -3451,15 +3467,16 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { bool LHSKnownNonNegative, LHSKnownNegative; ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - CxtI, DT); + AC, CxtI, DT); if (LHSKnownNonNegative || LHSKnownNegative) { bool RHSKnownNonNegative, RHSKnownNegative; ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - CxtI, DT); + AC, CxtI, DT); if (LHSKnownNegative && RHSKnownNegative) { // The sign bit is set in both cases: this MUST overflow. @@ -3481,6 +3498,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { if (Add && Add->hasNoSignedWrap()) { @@ -3490,9 +3508,9 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - CxtI, DT); + AC, CxtI, DT); ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - CxtI, DT); + AC, CxtI, DT); if ((LHSKnownNonNegative && RHSKnownNegative) || (LHSKnownNegative && RHSKnownNonNegative)) { @@ -3514,7 +3532,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { bool AddKnownNonNegative, AddKnownNegative; ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, - /*Depth=*/0, CxtI, DT); + /*Depth=*/0, AC, CxtI, DT); if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || (AddKnownNegative && LHSOrRHSKnownNegative)) { return OverflowResult::NeverOverflows; @@ -3588,18 +3606,20 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), - Add, DL, CxtI, DT); + Add, DL, AC, CxtI, DT); } OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, CxtI, DT); + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { @@ -4130,7 +4150,8 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const DataLayout &DL, unsigned Depth, - const Instruction *CxtI, const DominatorTree *DT) { + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; @@ -4168,7 +4189,7 @@ static bool isTruePredicate(CmpInst::Predicate Pred, match(B, m_Or(m_Specific(X), m_APInt(CB)))) { unsigned BitWidth = CA->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, CxtI, DT); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) return true; @@ -4193,24 +4214,25 @@ static Optional isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, const Value *ARHS, const Value *BLHS, const Value *BRHS, const DataLayout &DL, - unsigned Depth, const Instruction *CxtI, - const DominatorTree *DT) { + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { switch (Pred) { default: return None; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, CxtI, + if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, DT) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, CxtI, DT)) + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) return true; return None; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, CxtI, DT) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, CxtI, DT)) + if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) return true; return None; } @@ -4274,7 +4296,8 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool InvertAPred, - unsigned Depth, const Instruction *CxtI, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. if (LHS->getType() != RHS->getType()) @@ -4327,8 +4350,8 @@ Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, } if (APred == BPred) - return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, CxtI, - DT); + return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, + CxtI, DT); return None; } diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index ce77f06f135..ce523ae5793 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 1dbdab7ef67..a35e7e8caf1 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -94,6 +94,7 @@ CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) #endif FUNCTION_ANALYSIS("aa", AAManager()) +FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) @@ -169,6 +170,7 @@ FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) +FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp index 23a4a8098a5..de059b65663 100644 --- a/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/lib/Transforms/IPO/AlwaysInliner.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -89,6 +90,7 @@ public: char AlwaysInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 01d28c7bcf4..65b7bad3b1e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -65,6 +66,7 @@ namespace { /// struct ArgPromotion : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); @@ -104,6 +106,7 @@ DoPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 12363c630d4..402a66552c2 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -1102,6 +1103,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -1111,6 +1113,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { char PostOrderFunctionAttrsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs", "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs", "Deduce function attributes", false, false) diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index a2911eed278..ef0465883f0 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -55,7 +56,11 @@ public: InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, Params, TTI, PSI); + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }; + return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, PSI); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -71,6 +76,7 @@ private: char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b1f5ec5dd81..cc420a95c58 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" @@ -84,6 +85,7 @@ Inliner::Inliner(char &ID, bool InsertLifetime) /// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); getAAResultsAnalysisUsage(AU); @@ -421,6 +423,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, + std::function GetAssumptionCache, ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, bool InsertLifetime, function_ref GetInlineCost, @@ -493,7 +496,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG); + InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -629,6 +632,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, bool Inliner::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); + ACT = &getAnalysis(); PSI = getAnalysis().getPSI(); auto &TLI = getAnalysis().getTLI(); // We compute dedicated AA results for each function in the SCC as needed. We @@ -641,7 +645,10 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); return *AAR; }; - return inlineCallsImpl(SCC, CG, PSI, TLI, InsertLifetime, + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }; + return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, [this](CallSite CS) { return getInlineCost(CS); }, AARGetter, ImportedFunctionsStats); } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index c408f227b29..7ef3fc1fc2a 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -46,11 +46,19 @@ struct PartialInlinerLegacyPass : public ModulePass { initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } bool runOnModule(Module &M) override { if (skipModule(M)) return false; - InlineFunctionInfo IFI(nullptr); + AssumptionCacheTracker *ACT = &getAnalysis(); + std::function GetAssumptionCache = + [&ACT](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }; + InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); return PartialInlinerImpl(IFI).run(M); } }; @@ -192,8 +200,11 @@ bool PartialInlinerImpl::run(Module &M) { } char PartialInlinerLegacyPass::ID = 0; -INITIALIZE_PASS(PartialInlinerLegacyPass, "partial-inliner", - "Partial Inliner", false, false) +INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", + "Partial Inliner", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", + "Partial Inliner", false, false) ModulePass *llvm::createPartialInliningPass() { return new PartialInlinerLegacyPass(); @@ -201,7 +212,12 @@ ModulePass *llvm::createPartialInliningPass() { PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { - InlineFunctionInfo IFI(nullptr); + auto &FAM = AM.getResult(M).getManager(); + std::function GetAssumptionCache = + [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); if (PartialInlinerImpl(IFI).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index c26a4fc8e13..6a43f8dbac4 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Constants.h" @@ -141,11 +142,13 @@ private: class SampleProfileLoader { public: SampleProfileLoader(StringRef Name = SampleProfileFile) - : DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), Samples(nullptr), - Filename(Name), ProfileIsValid(false), TotalCollectedSamples(0) {} + : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(), + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) {} bool doInitialization(Module &M); bool runOnModule(Module &M); + void setACT(AssumptionCacheTracker *A) { ACT = A; } void dump() { Reader->dump(); } @@ -204,6 +207,8 @@ protected: std::unique_ptr> PDT; std::unique_ptr LI; + AssumptionCacheTracker *ACT; + /// \brief Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -250,6 +255,10 @@ public: StringRef getPassName() const override { return "Sample profile pass"; } bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + private: SampleProfileLoader SampleLoader; }; @@ -615,6 +624,8 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { bool SampleProfileLoader::inlineHotFunctions(Function &F) { bool Changed = false; LLVMContext &Ctx = F.getContext(); + std::function GetAssumptionCache = [&]( + Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; while (true) { bool LocalChanged = false; SmallVector CIS; @@ -635,7 +646,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) { } } for (auto I : CIS) { - InlineFunctionInfo IFI(nullptr); + InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); CallSite CS(I); Function *CalledFunction = CS.getCalledFunction(); if (!CalledFunction || !CalledFunction->getSubprogram()) @@ -1263,8 +1274,11 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } char SampleProfileLoaderLegacyPass::ID = 0; -INITIALIZE_PASS(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) +INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", + "Sample Profile loader", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", + "Sample Profile loader", false, false) bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); @@ -1307,6 +1321,8 @@ bool SampleProfileLoader::runOnModule(Module &M) { } bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { + // FIXME: pass in AssumptionCache correctly for the new pass manager. + SampleLoader.setACT(&getAnalysis()); return SampleLoader.runOnModule(M); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 12ceab3aa16..38a0b4a1e9a 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1035,7 +1035,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT)) + I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc @@ -1154,7 +1154,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return replaceInstUsesWith(I, V); // A+B --> A|B iff A and B have no bits set in common. - if (haveNoCommonBitsSet(LHS, RHS, DL, &I, &DT)) + if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) return BinaryOperator::CreateOr(LHS, RHS); if (Constant *CRHS = dyn_cast(RHS)) { @@ -1317,7 +1317,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = - SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT)) + SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (isa(RHS)) { @@ -1493,7 +1493,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT)) + I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1704,7 +1704,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = - SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT)) + SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index cbcd459f582..2ab76d6f8ea 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1355,7 +1355,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // (A|B)&(A|C) -> A|(B&C) etc @@ -1741,17 +1741,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, CxtI, + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI, &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, CxtI, + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI, &DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, CxtI, - &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, CxtI, - &DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC, + CxtI, &DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC, + CxtI, &DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } @@ -2175,7 +2175,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // (A&B)|(A&C) -> A&(B|C) etc @@ -2533,7 +2533,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // (A&B)^(A&C) -> A&(B^C) etc diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cb741fba43b..4d432d56aa3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -109,8 +109,8 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -210,7 +210,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -1358,7 +1358,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL, - &TLI, &DT)) + &TLI, &DT, &AC)) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1558,7 +1558,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -1575,7 +1575,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, &DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -1592,7 +1592,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { Type *VTy = VectorType::get(Builder->getFloatTy(), II->getType()->getVectorNumElements()); @@ -1604,7 +1604,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvlfd: // Turn PPC QPX qvlfd -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, &DT) >= 32) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -1613,7 +1613,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfs: // Turn PPC QPX qvstfs -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, &DT) >= 16) { Type *VTy = VectorType::get(Builder->getFloatTy(), II->getArgOperand(0)->getType()->getVectorNumElements()); @@ -1625,7 +1625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfd: // Turn PPC QPX qvstfd -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC, &DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -2249,7 +2249,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { unsigned MemAlign = - getKnownAlignment(II->getArgOperand(0), DL, II, &DT); + getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { @@ -2528,78 +2528,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (KnownOne.isAllOnesValue()) return eraseInstFromFunction(*II); - // For assumptions, add to the associated operand bundle the values to which - // the assumption might apply. - // Note: This code must be kept in-sync with the code in - // computeKnownBitsFromAssume in ValueTracking. - SmallVector Affected; - auto AddAffected = [&Affected](Value *V) { - if (isa(V)) { - Affected.push_back(V); - } else if (auto *I = dyn_cast(V)) { - Affected.push_back(I); - - if (I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt) { - V = I->getOperand(0); - if (isa(V) || isa(V)) - Affected.push_back(V); - } - } - }; - - CmpInst::Predicate Pred; - if (match(IIOperand, m_ICmp(Pred, m_Value(A), m_Value(B)))) { - AddAffected(A); - AddAffected(B); - - if (Pred == ICmpInst::ICMP_EQ) { - // For equality comparisons, we handle the case of bit inversion. - auto AddAffectedFromEq = [&AddAffected](Value *V) { - Value *A; - if (match(V, m_Not(m_Value(A)))) { - AddAffected(A); - V = A; - } - - Value *B; - ConstantInt *C; - if (match(V, - m_CombineOr(m_And(m_Value(A), m_Value(B)), - m_CombineOr(m_Or(m_Value(A), m_Value(B)), - m_Xor(m_Value(A), m_Value(B)))))) { - AddAffected(A); - AddAffected(B); - } else if (match(V, - m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), - m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), - m_AShr(m_Value(A), - m_ConstantInt(C)))))) { - AddAffected(A); - } - }; - - AddAffectedFromEq(A); - AddAffectedFromEq(B); - } - } - - // If the list of affected values is the same as the existing list then - // there's nothing more to do here. - if (!Affected.empty()) - if (auto OB = CI.getOperandBundle("affected")) - if (Affected.size() == OB.getValue().Inputs.size() && - std::equal(Affected.begin(), Affected.end(), - OB.getValue().Inputs.begin())) - Affected.clear(); - - if (!Affected.empty()) { - Builder->CreateCall(AssumeIntrinsic, IIOperand, - OperandBundleDef("affected", Affected), - II->getName()); - return eraseInstFromFunction(*II); - } - break; } case Intrinsic::experimental_gc_relocate: { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9ffb47efbfb..6c4f4a1ce31 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4156,7 +4156,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } if (Value *V = - SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &I)) + SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I)) return replaceInstUsesWith(I, V); // comparing -val or val with non-zero is the same as just comparing val @@ -4321,7 +4321,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, DL, false, 0, &I, &DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); @@ -4644,7 +4644,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, - I.getFastMathFlags(), DL, &TLI, &DT, &I)) + I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I)) return replaceInstUsesWith(I, V); // Simplify 'fcmp pred X, X' diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 24ba412ca99..c30598af423 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" @@ -178,6 +179,7 @@ private: AliasAnalysis *AA; // Required analyses. + AssumptionCache &AC; TargetLibraryInfo &TLI; DominatorTree &DT; const DataLayout &DL; @@ -191,17 +193,19 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, - TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout &DL, - LoopInfo *LI) + AssumptionCache &AC, TargetLibraryInfo &TLI, + DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), - ExpensiveCombines(ExpensiveCombines), AA(AA), TLI(TLI), DT(DT), DL(DL), - LI(LI), MadeIRChange(false) {} + ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), + DL(DL), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// /// \returns true if the IR is changed. bool run(); + AssumptionCache &getAssumptionCache() const { return AC; } + const DataLayout &getDataLayout() const { return DL; } DominatorTree &getDominatorTree() const { return DT; } @@ -472,28 +476,30 @@ public: void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, Instruction *CxtI) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, CxtI, &DT); + return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, + &DT); } bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::MaskedValueIsZero(V, Mask, DL, Depth, CxtI, &DT); + return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); } unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::ComputeNumSignBits(Op, DL, Depth, CxtI, &DT); + return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, CxtI, &DT); + return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, + &DT); } OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS, const Instruction *CxtI) { - return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, CxtI, &DT); + return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); } OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, const Instruction *CxtI) { - return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, CxtI, &DT); + return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); } private: diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 517c0d5b666..5276bee4e0a 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -286,7 +286,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { SmallVector ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( - Copy->getSource(), AI.getAlignment(), DL, &AI, &DT); + Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -826,7 +826,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( - Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &DT); + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); @@ -1199,7 +1199,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &DT); + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 8374437dc79..c70d7585cac 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -48,7 +48,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, BinaryOperator *I = dyn_cast(V); if (I && I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, - &CxtI, &IC.getDominatorTree())) { + &IC.getAssumptionCache(), &CxtI, + &IC.getDominatorTree())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { @@ -178,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -544,7 +545,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { std::swap(Op0, Op1); if (Value *V = - SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT)) + SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -1060,7 +1061,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1133,7 +1134,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1196,7 +1197,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1248,7 +1249,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT)) + DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (isa(Op0)) @@ -1422,7 +1423,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1435,7 +1436,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); @@ -1465,7 +1466,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT)) + if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1541,7 +1542,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT)) + DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index a2f593ac41b..184897f751f 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT)) + if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) @@ -937,7 +937,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator(); Value *VA = PN.getIncomingValue(i); - if (isKnownNonZero(VA, DL, 0, CtxI, &DT)) { + if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) { if (!NonZeroConst) NonZeroConst = GetAnyNonZeroConstInt(PN); PN.setIncomingValue(i, NonZeroConst); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 96a531bb254..36644845352 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1101,7 +1101,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Type *SelType = SI.getType(); if (Value *V = - SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT)) + SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index fa7a7c6bc73..bc38c4aca34 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -722,7 +722,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT)) + I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -763,7 +763,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), - DL, &TLI, &DT)) + DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -807,7 +807,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), - DL, &TLI, &DT)) + DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index ee21d6c7e72..732a786ceb7 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -145,7 +145,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *V = SimplifyExtractElementInst( - EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT)) + EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index ab1869b69b9..0d29c1dceea 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -40,6 +40,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" @@ -682,14 +683,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), DL, &TLI, &DT)) + SI1->getFalseValue(), DL, &TLI, &DT, &AC)) SI = Builder->CreateSelect(SI0->getCondition(), Builder->CreateBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue()), V); if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), DL, &TLI, &DT)) + SI1->getTrueValue(), DL, &TLI, &DT, &AC)) SI = Builder->CreateSelect( SI0->getCondition(), V, Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -1373,7 +1374,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); if (Value *V = - SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT)) + SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -2288,7 +2289,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return replaceInstUsesWith(EV, Agg); if (Value *V = - SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT)) + SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC)) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { @@ -3114,8 +3115,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, - AliasAnalysis *AA, TargetLibraryInfo &TLI, - DominatorTree &DT, + AliasAnalysis *AA, AssumptionCache &AC, + TargetLibraryInfo &TLI, DominatorTree &DT, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); @@ -3125,8 +3126,12 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, /// instructions into the worklist when they are created. IRBuilder Builder( F.getContext(), TargetFolder(DL), - IRBuilderCallbackInserter([&Worklist](Instruction *I) { + IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { Worklist.Add(I); + + using namespace llvm::PatternMatch; + if (match(I, m_Intrinsic())) + AC.registerAssumption(cast(I)); })); // Lower dbg.declare intrinsics otherwise their value may be clobbered @@ -3143,7 +3148,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, - AA, TLI, DT, DL, LI); + AA, AC, TLI, DT, DL, LI); Changed |= IC.run(); if (!Changed) @@ -3155,13 +3160,14 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, PreservedAnalyses InstCombinePass::run(Function &F, FunctionAnalysisManager &AM) { + auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto *LI = AM.getCachedResult(F); // FIXME: The AliasAnalysis is not yet supported in the new pass manager - if (!combineInstructionsOverFunction(F, Worklist, nullptr, TLI, DT, + if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3176,6 +3182,7 @@ PreservedAnalyses InstCombinePass::run(Function &F, void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -3190,6 +3197,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Required analyses. auto AA = &getAnalysis().getAAResults(); + auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); @@ -3197,13 +3205,14 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, TLI, DT, + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 31503a00379..c1df3173c0f 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -53,6 +54,7 @@ struct AlignmentFromAssumptions : public FunctionPass { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -72,6 +74,7 @@ char AlignmentFromAssumptions::ID = 0; static const char aip_name[] = "Alignment from assumptions"; INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME, aip_name, false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME, @@ -405,13 +408,15 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) { if (skipFunction(F)) return false; + auto &AC = getAnalysis().getAssumptionCache(F); ScalarEvolution *SE = &getAnalysis().getSE(); DominatorTree *DT = &getAnalysis().getDomTree(); - return Impl.runImpl(F, SE, DT); + return Impl.runImpl(F, AC, SE, DT); } -bool AlignmentFromAssumptionsPass::runImpl(Function &F, ScalarEvolution *SE_, +bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC, + ScalarEvolution *SE_, DominatorTree *DT_) { SE = SE_; DT = DT_; @@ -420,12 +425,9 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, ScalarEvolution *SE_, NewSrcAlignments.clear(); bool Changed = false; - - for (auto &B : F) - for (auto &I : B) - if (auto *II = dyn_cast(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - Changed |= processAssumption(II); + for (auto &AssumeVH : AC.assumptions()) + if (AssumeVH) + Changed |= processAssumption(cast(AssumeVH)); return Changed; } @@ -433,9 +435,10 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, ScalarEvolution *SE_, PreservedAnalyses AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) { + AssumptionCache &AC = AM.getResult(F); ScalarEvolution &SE = AM.getResult(F); DominatorTree &DT = AM.getResult(F); - bool Changed = runImpl(F, &SE, &DT); + bool Changed = runImpl(F, AC, &SE, &DT); // FIXME: We need to invalidate this to avoid PR28400. Is there a better // solution? diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 3faa2675816..9bf638dcbae 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -250,6 +251,7 @@ public: const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; + AssumptionCache &AC; MemorySSA *MSSA; typedef RecyclingAllocator< BumpPtrAllocator, ScopedHashTableVal> AllocatorTy; @@ -312,8 +314,8 @@ public: /// \brief Set up the EarlyCSE runner for a particular function. EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, MemorySSA *MSSA) - : TLI(TLI), TTI(TTI), DT(DT), MSSA(MSSA), CurrentGeneration(0) {} + DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {} bool run(); @@ -670,7 +672,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT)) { + if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); bool Killed = false; if (!Inst->use_empty()) { @@ -956,10 +958,11 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto &TLI = AM.getResult(F); auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); auto *MSSA = UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, MSSA); + EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1001,15 +1004,17 @@ public: auto &TLI = getAnalysis().getTLI(); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); + auto &AC = getAnalysis().getAssumptionCache(F); auto *MSSA = UseMemorySSA ? &getAnalysis().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, MSSA); + EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); return CSE.run(); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -1031,6 +1036,7 @@ char EarlyCSELegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) @@ -1051,6 +1057,7 @@ FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) { INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", "Early CSE w/ MemorySSA", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a25909b1042..9485bfd7c29 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -581,13 +582,14 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { // significant! Re-ordering these variables will cause GVN when run alone to // be less effective! We should fix memdep and basic-aa to not exhibit this // behavior, but until then don't change the order here. + auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &MemDep = AM.getResult(F); auto *LI = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, DT, TLI, AA, &MemDep, LI, &ORE); + bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -1532,7 +1534,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getPointerOperand(), DL); + PHITransAddr Address(LI->getPointerOperand(), DL, AC); Value *LoadPtr = nullptr; LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); @@ -2101,7 +2103,7 @@ bool GVN::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { bool Changed = false; if (!I->use_empty()) { I->replaceAllUsesWith(V); @@ -2230,10 +2232,11 @@ bool GVN::processInstruction(Instruction *I) { } /// runOnFunction - This is the main transformation entry point for a function. -bool GVN::runImpl(Function &F, DominatorTree &RunDT, +bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, OptimizationRemarkEmitter *RunORE) { + AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); TLI = &RunTLI; @@ -2750,7 +2753,8 @@ public: auto *LIWP = getAnalysisIfAvailable(); return Impl.runImpl( - F, getAnalysis().getDomTree(), + F, getAnalysis().getAssumptionCache(F), + getAnalysis().getDomTree(), getAnalysis().getTLI(), getAnalysis().getAAResults(), NoLoads ? nullptr @@ -2760,6 +2764,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); if (!NoLoads) @@ -2784,6 +2789,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { } INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index cedd6d7263f..8e81541c233 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1336,7 +1336,7 @@ bool LoopConstrainer::run() { auto *L = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, true); + simplifyLoop(L, &DT, &LI, &SE, nullptr, true); // Pre loops are slow paths, we do not need to perform any loop // optimizations on them. DisableAllLoopOptsOnLoop(*L); @@ -1346,14 +1346,14 @@ bool LoopConstrainer::run() { auto *L = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, true); + simplifyLoop(L, &DT, &LI, &SE, nullptr, true); // Post loops are slow paths, we do not need to perform any loop // optimizations on them. DisableAllLoopOptsOnLoop(*L); } formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, true); + simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); return true; } diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 526a4132582..d09af32a99f 100644 --- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "loop-data-prefetch" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -65,10 +66,10 @@ namespace { /// Loop prefetch implementation class. class LoopDataPrefetch { public: - LoopDataPrefetch(LoopInfo *LI, ScalarEvolution *SE, + LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) - : LI(LI), SE(SE), TTI(TTI), ORE(ORE) {} + : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {} bool run(); @@ -97,6 +98,7 @@ private: return TTI->getMaxPrefetchIterationsAhead(); } + AssumptionCache *AC; LoopInfo *LI; ScalarEvolution *SE; const TargetTransformInfo *TTI; @@ -112,6 +114,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -130,6 +133,7 @@ public: char LoopDataPrefetchLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) @@ -161,11 +165,12 @@ PreservedAnalyses LoopDataPrefetchPass::run(Function &F, FunctionAnalysisManager &AM) { LoopInfo *LI = &AM.getResult(F); ScalarEvolution *SE = &AM.getResult(F); + AssumptionCache *AC = &AM.getResult(F); OptimizationRemarkEmitter *ORE = &AM.getResult(F); const TargetTransformInfo *TTI = &AM.getResult(F); - LoopDataPrefetch LDP(LI, SE, TTI, ORE); + LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE); bool Changed = LDP.run(); if (Changed) { @@ -184,12 +189,14 @@ bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) { LoopInfo *LI = &getAnalysis().getLoopInfo(); ScalarEvolution *SE = &getAnalysis().getSE(); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = &getAnalysis().getORE(); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); - LoopDataPrefetch LDP(LI, SE, TTI, ORE); + LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE); return LDP.run(); } @@ -218,7 +225,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { return MadeChange; SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); // Calculate the number of iterations ahead to prefetch CodeMetrics Metrics; diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index f9bbd820fca..f6620ad1ade 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -34,6 +35,7 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions simplified"); static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, + AssumptionCache *AC, const TargetLibraryInfo *TLI) { SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -75,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, DL, TLI, DT); + Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) @@ -163,13 +165,17 @@ public: getAnalysisIfAvailable(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis().getLoopInfo(); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); - return SimplifyLoopInst(L, DT, LI, TLI); + return SimplifyLoopInst(L, DT, LI, AC, TLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); getLoopAnalysisUsage(AU); @@ -186,10 +192,11 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, // Use getCachedResult because Loop pass cannot trigger a function analysis. auto *DT = FAM.getCachedResult(*F); auto *LI = FAM.getCachedResult(*F); + auto *AC = FAM.getCachedResult(*F); const auto *TLI = FAM.getCachedResult(*F); - assert((LI && TLI) && "Analyses for Loop Inst Simplify not available"); + assert((LI && AC && TLI) && "Analyses for Loop Inst Simplify not available"); - if (!SimplifyLoopInst(&L, DT, LI, TLI)) + if (!SimplifyLoopInst(&L, DT, LI, AC, TLI)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); @@ -198,6 +205,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, char LoopInstSimplifyLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify", "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify", diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index e9c1f2ed05f..e9f84edd1cb 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DependenceAnalysis.h" diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 270998701a2..32d2caa78da 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -54,14 +55,15 @@ class LoopRotate { const unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; + AssumptionCache *AC; DominatorTree *DT; ScalarEvolution *SE; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, - const TargetTransformInfo *TTI, DominatorTree *DT, - ScalarEvolution *SE) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), DT(DT), SE(SE) { + const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) { } bool processLoop(Loop *L); @@ -214,7 +216,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // duplicate blocks inside it. { SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); @@ -307,7 +309,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide TLI, and DT to SimplifyInstruction. + // FIXME: Provide TLI, DT, AC to SimplifyInstruction. Value *V = SimplifyInstruction(C, DL); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value @@ -324,6 +326,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Otherwise, stick the new instruction into the new block! C->setName(Inst->getName()); C->insertBefore(LoopEntryBranch); + + if (auto *II = dyn_cast(C)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); } } @@ -624,12 +630,13 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) { auto *LI = FAM.getCachedResult(*F); const auto *TTI = FAM.getCachedResult(*F); - assert((LI && TTI) && "Analyses for loop rotation not available"); + auto *AC = FAM.getCachedResult(*F); + assert((LI && TTI && AC) && "Analyses for loop rotation not available"); // Optional analyses. auto *DT = FAM.getCachedResult(*F); auto *SE = FAM.getCachedResult(*F); - LoopRotate LR(DefaultRotationThreshold, LI, TTI, DT, SE); + LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE); bool Changed = LR.processLoop(&L); if (!Changed) @@ -654,6 +661,7 @@ public: // LCSSA form makes instruction renaming easier. void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -665,11 +673,12 @@ public: auto *LI = &getAnalysis().getLoopInfo(); const auto *TTI = &getAnalysis().getTTI(F); + auto *AC = &getAnalysis().getAssumptionCache(F); auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, DT, SE); + LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); return LR.processLoop(L); } }; @@ -678,6 +687,7 @@ public: char LoopRotateLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index f87f6dd7632..d37339fc5fe 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 6a16bdaef24..48ec4386662 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -555,9 +556,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, - unsigned BEInsns) { + AssumptionCache *AC, unsigned BEInsns) { SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; for (BasicBlock *BB : L->blocks()) @@ -955,7 +956,7 @@ static bool computeUnrollCount( static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE, const TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, + AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, Optional ProvidedCount, Optional ProvidedThreshold, @@ -982,7 +983,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return false; unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, UP.BEInsns); + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" @@ -1058,7 +1059,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // Unroll the loop. if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, - TripMultiple, UP.PeelCount, LI, SE, &DT, &ORE, + TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE, PreserveLCSSA)) return false; @@ -1103,13 +1104,14 @@ public: ScalarEvolution *SE = &getAnalysis().getSE(); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); + auto &AC = getAnalysis().getAssumptionCache(F); // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(&F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - return tryToUnrollLoop(L, DT, LI, SE, TTI, ORE, PreserveLCSSA, + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); @@ -1119,6 +1121,7 @@ public: /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); // FIXME: Loop passes are required to preserve domtree, and for now we just // recreate dom info if anything gets unrolled. @@ -1129,6 +1132,7 @@ public: char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) @@ -1160,6 +1164,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) { LoopInfo *LI = FAM.getCachedResult(*F); ScalarEvolution *SE = FAM.getCachedResult(*F); auto *TTI = FAM.getCachedResult(*F); + auto *AC = FAM.getCachedResult(*F); auto *ORE = FAM.getCachedResult(*F); if (!DT) report_fatal_error( @@ -1173,12 +1178,15 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) { if (!TTI) report_fatal_error( "LoopUnrollPass: TargetIRAnalysis not cached at a higher level"); + if (!AC) + report_fatal_error( + "LoopUnrollPass: AssumptionAnalysis not cached at a higher level"); if (!ORE) report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); bool Changed = - tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *ORE, /*PreserveLCSSA*/ true, + tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 741785f32c4..6f7682c96ce 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -28,10 +28,10 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -138,7 +138,8 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop *L, const TargetTransformInfo &TTI); + bool countLoop(const Loop *L, const TargetTransformInfo &TTI, + AssumptionCache *AC); // Clean all data related to given loop. void forgetLoop(const Loop *L); @@ -165,6 +166,7 @@ namespace { class LoopUnswitch : public LoopPass { LoopInfo *LI; // Loop information LPPassManager *LPM; + AssumptionCache *AC; // Used to check if second loop needs processing after // RewriteLoopBodyWithConditionConstant rewrites first loop. @@ -213,6 +215,7 @@ namespace { /// loop preheaders be inserted into the CFG. /// void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -257,7 +260,8 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. -bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) { +bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, + AssumptionCache *AC) { LoopPropsMapIt PropsIt; bool Inserted; @@ -275,7 +279,7 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) { // This is a very ad-hoc heuristic. SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can @@ -374,6 +378,7 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, char LoopUnswitch::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", @@ -440,6 +445,8 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { if (skipLoop(L)) return false; + AC = &getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); LI = &getAnalysis().getLoopInfo(); LPM = &LPM_Ref; DT = &getAnalysis().getDomTree(); @@ -528,7 +535,8 @@ bool LoopUnswitch::processCurrentLoop() { // Analyze loop cost, and stop unswitching if loop content can not be duplicated. if (!BranchesInfo.countLoop( currentLoop, getAnalysis().getTTI( - *currentLoop->getHeader()->getParent()))) + *currentLoop->getHeader()->getParent()), + AC)) return false; // Try trivial unswitch first before loop over other basic blocks in the loop. @@ -1120,10 +1128,15 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, } // Rewrite the code to refer to itself. - for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) - for (Instruction &I : *NewBlocks[i]) + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (Instruction &I : *NewBlocks[i]) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast(loopPreheader->getTerminator()); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 1d8ab1be04b..b19e663cc05 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -313,6 +313,7 @@ namespace { // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -346,6 +347,7 @@ FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); } INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -1291,10 +1293,11 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. + AssumptionCache &AC = LookupAssumptionCache(); DominatorTree &DT = LookupDomTree(); if (MDep->getAlignment() < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, - CS.getInstruction(), &DT) < ByValAlign) + CS.getInstruction(), &AC, &DT) < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and @@ -1373,11 +1376,15 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { auto LookupAliasAnalysis = [&]() -> AliasAnalysis & { return AM.getResult(F); }; + auto LookupAssumptionCache = [&]() -> AssumptionCache & { + return AM.getResult(F); + }; auto LookupDomTree = [&]() -> DominatorTree & { return AM.getResult(F); }; - bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis, LookupDomTree); + bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis, + LookupAssumptionCache, LookupDomTree); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -1389,11 +1396,13 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { bool MemCpyOptPass::runImpl( Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, std::function LookupAliasAnalysis_, + std::function LookupAssumptionCache_, std::function LookupDomTree_) { bool MadeChange = false; MD = MD_; TLI = TLI_; LookupAliasAnalysis = std::move(LookupAliasAnalysis_); + LookupAssumptionCache = std::move(LookupAssumptionCache_); LookupDomTree = std::move(LookupDomTree_); // If we don't have at least memset and memcpy, there is little point of doing @@ -1423,9 +1432,13 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { auto LookupAliasAnalysis = [this]() -> AliasAnalysis & { return getAnalysis().getAAResults(); }; + auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & { + return getAnalysis().getAssumptionCache(F); + }; auto LookupDomTree = [this]() -> DominatorTree & { return getAnalysis().getDomTree(); }; - return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupDomTree); + return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache, + LookupDomTree); } diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 1649c2e5ab7..0a3bf7b4c31 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -107,6 +107,7 @@ public: AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -122,6 +123,7 @@ private: char NaryReassociateLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate", "Nary reassociation", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -137,22 +139,24 @@ bool NaryReassociateLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; + auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); auto *TLI = &getAnalysis().getTLI(); auto *TTI = &getAnalysis().getTTI(F); - return Impl.runImpl(F, DT, SE, TLI, TTI); + return Impl.runImpl(F, AC, DT, SE, TLI, TTI); } PreservedAnalyses NaryReassociatePass::run(Function &F, FunctionAnalysisManager &AM) { + auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); auto *SE = &AM.getResult(F); auto *TLI = &AM.getResult(F); auto *TTI = &AM.getResult(F); - bool Changed = runImpl(F, DT, SE, TLI, TTI); + bool Changed = runImpl(F, AC, DT, SE, TLI, TTI); // FIXME: We need to invalidate this to avoid PR28400. Is there a better // solution? @@ -169,10 +173,11 @@ PreservedAnalyses NaryReassociatePass::run(Function &F, return PA; } -bool NaryReassociatePass::runImpl(Function &F, DominatorTree *DT_, - ScalarEvolution *SE_, +bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_, + DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_) { + AC = AC_; DT = DT_; SE = SE_; TLI = TLI_; @@ -302,7 +307,7 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, IndexToSplit = SExt->getOperand(0); } else if (ZExtInst *ZExt = dyn_cast(IndexToSplit)) { // zext can be treated as sext if the source is non-negative. - if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, GEP, DT)) + if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT)) IndexToSplit = ZExt->getOperand(0); } @@ -311,7 +316,7 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, // nsw, we cannot split the add because // sext(LHS + RHS) != sext(LHS) + sext(RHS). if (requiresSignExtension(IndexToSplit, GEP) && - computeOverflowForSignedAdd(AO, *DL, GEP, DT) != + computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) != OverflowResult::NeverOverflows) return nullptr; @@ -340,7 +345,7 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, IndexExprs.push_back(SE->getSCEV(*Index)); // Replace the I-th index with LHS. IndexExprs[I] = SE->getSCEV(LHS); - if (isKnownNonNegative(LHS, *DL, 0, GEP, DT) && + if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) && DL->getTypeSizeInBits(LHS->getType()) < DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) { // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 12481de54b0..1de742050cb 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/BasicBlock.h" diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 9021bcf20b1..887818bfdde 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" @@ -4183,15 +4184,17 @@ bool SROA::promoteAllocas(Function &F) { NumPromoted += PromotableAllocas.size(); DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, nullptr); + PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); PromotableAllocas.clear(); return true; } -PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT) { +PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, + AssumptionCache &RunAC) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DT = &RunDT; + AC = &RunAC; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); @@ -4239,7 +4242,8 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT) { } PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { - return runImpl(F, AM.getResult(F)); + return runImpl(F, AM.getResult(F), + AM.getResult(F)); } /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. @@ -4259,10 +4263,12 @@ public: return false; auto PA = Impl.runImpl( - F, getAnalysis().getDomTree()); + F, getAnalysis().getDomTree(), + getAnalysis().getAssumptionCache(F)); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.setPreservesCFG(); @@ -4278,6 +4284,7 @@ FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); } INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 999860d75f4..4d594532c36 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -459,7 +459,7 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS). if (BO->getOpcode() == Instruction::Or && - !haveNoCommonBitsSet(LHS, RHS, DL, BO, DT)) + !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT)) return false; // In addition, tracing into BO requires that its surrounding s/zext (if diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 490171abf2c..f2723bd7af8 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -128,6 +129,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, + AssumptionCache *AC, unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; @@ -143,7 +145,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, &LoopHeaders)) { + if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) { LocalChange = true; ++NumSimpl; } @@ -154,10 +156,10 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - int BonusInstThreshold) { + AssumptionCache *AC, int BonusInstThreshold) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, BonusInstThreshold); + EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -171,7 +173,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, BonusInstThreshold); + EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); @@ -187,8 +189,9 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold) PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); + auto &AC = AM.getResult(F); - if (!simplifyFunctionCFG(F, TTI, BonusInstThreshold)) + if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); @@ -211,12 +214,15 @@ struct CFGSimplifyPass : public FunctionPass { if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F))) return false; + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); - return simplifyFunctionCFG(F, TTI, BonusInstThreshold); + return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -227,6 +233,7 @@ char CFGSimplifyPass::ID = 0; INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 2c0597ef981..ee083f91c7a 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" @@ -1094,8 +1095,11 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions) + if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) return; + AssumptionCache *AC = IFI.GetAssumptionCache + ? &(*IFI.GetAssumptionCache)(*CS.getCaller()) + : nullptr; auto &DL = CS.getCaller()->getParent()->getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions @@ -1118,11 +1122,13 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, DL, CS.getInstruction(), &DT) >= Align) + if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align) continue; - IRBuilder<>(CS.getInstruction()) - .CreateAlignmentAssumption(DL, Arg, Align); + CallInst *NewAssumption = IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); + if (AC) + AC->registerAssumption(NewAssumption); } } } @@ -1233,11 +1239,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + AssumptionCache *AC = + IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; const DataLayout &DL = Caller->getParent()->getDataLayout(); // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall) >= + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= ByValAlignment) return Arg; @@ -1653,6 +1661,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CS, VMap); + + // Register any cloned assumptions. + if (IFI.GetAssumptionCache) + for (BasicBlock &NewBlock : + make_range(FirstNewBlock->getIterator(), Caller->end())) + for (Instruction &I : NewBlock) { + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); + } } // If there are any alloca instructions in the block that used to be the entry @@ -2173,8 +2191,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { + AssumptionCache *AC = + IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr)) { + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 22029f646b2..6de0f34e94c 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1019,13 +1019,14 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI, + AssumptionCache *AC, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, CxtI, DT); + computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 176de0cf153..00cda2af00c 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -46,6 +46,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -203,12 +204,13 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, /// \brief The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. -static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT) { +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AssumptionCache *AC) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -246,7 +248,8 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT) { /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, bool PreserveLCSSA) { + ScalarEvolution *SE, bool PreserveLCSSA, + AssumptionCache *AC) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; @@ -255,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, BasicBlock *Header = L->getHeader(); assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - PHINode *PN = findPHIToPartitionLoops(L, DT); + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -498,7 +501,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// \brief Simplify one loop and queue further loops for simplification. static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, bool PreserveLCSSA) { + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; ReprocessLoop: @@ -592,7 +596,7 @@ ReprocessLoop: // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = - separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA)) { + separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -624,7 +628,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); @@ -727,7 +731,8 @@ ReprocessLoop: } bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, bool PreserveLCSSA) { + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -744,7 +749,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, - PreserveLCSSA); + AC, PreserveLCSSA); return Changed; } @@ -759,6 +764,8 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + // We need loop information to identify the loops... AU.addRequired(); AU.addPreserved(); @@ -784,6 +791,7 @@ namespace { char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", @@ -802,6 +810,8 @@ bool LoopSimplify::runOnFunction(Function &F) { DominatorTree *DT = &getAnalysis().getDomTree(); auto *SEWP = getAnalysisIfAvailable(); ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); #ifndef NDEBUG @@ -816,7 +826,7 @@ bool LoopSimplify::runOnFunction(Function &F) { // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, PreserveLCSSA); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); #ifndef NDEBUG if (PreserveLCSSA) { @@ -834,11 +844,12 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, LoopInfo *LI = &AM.getResult(F); DominatorTree *DT = &AM.getResult(F); ScalarEvolution *SE = AM.getCachedResult(F); + AssumptionCache *AC = &AM.getResult(F); // FIXME: This pass should verify that the loops on which it's operating // are in canonical SSA form, and that the pass itself preserves this form. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, true /* PreserveLCSSA */); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */); // FIXME: We need to invalidate this to avoid PR28400. Is there a better // solution? diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6cea53e1b4e..fb74505518e 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -213,7 +214,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, + bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { @@ -510,9 +512,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } // Remap all instructions in the most recent iteration - for (BasicBlock *NewBlock : NewBlocks) - for (Instruction &I : *NewBlock) + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { ::remapInstruction(&I, LastValueMap); + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } } // Loop over the PHI nodes in the original block, setting incoming values. @@ -698,7 +705,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // loops too). // TODO: That potentially might be compile-time expensive. We should try // to fix the loop-simplified form incrementally. - simplifyLoop(OuterL, DT, LI, SE, PreserveLCSSA); + simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -716,7 +723,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } else { // Simplify loops for which we might've broken loop-simplify form. for (Loop *SubLoop : LoopsToSimplify) - simplifyLoop(SubLoop, DT, LI, SE, PreserveLCSSA); + simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); } } diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 617ad66d37d..24b3b12930a 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -26,7 +27,8 @@ using namespace llvm; STATISTIC(NumPromoted, "Number of alloca's promoted"); -static bool promoteMemoryToRegister(Function &F, DominatorTree &DT) { +static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, + AssumptionCache &AC) { std::vector Allocas; BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; @@ -44,7 +46,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, nullptr); + PromoteMemToReg(Allocas, DT, nullptr, &AC); NumPromoted += Allocas.size(); Changed = true; } @@ -53,7 +55,8 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT) { PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); - if (!promoteMemoryToRegister(F, DT)) + auto &AC = AM.getResult(F); + if (!promoteMemoryToRegister(F, DT, AC)) return PreservedAnalyses::all(); // FIXME: This should also 'preserve the CFG'. @@ -75,10 +78,13 @@ struct PromoteLegacyPass : public FunctionPass { return false; DominatorTree &DT = getAnalysis().getDomTree(); - return promoteMemoryToRegister(F, DT); + AssumptionCache &AC = + getAnalysis().getAssumptionCache(F); + return promoteMemoryToRegister(F, DT, AC); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); } @@ -89,6 +95,7 @@ char PromoteLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " "Register", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 96c3e7c6451..35faa6f65ef 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -228,6 +228,9 @@ struct PromoteMem2Reg { /// An AliasSetTracker object to update. If null, don't update it. AliasSetTracker *AST; + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. + AssumptionCache *AC; + /// Reverse mapping of Allocas. DenseMap AllocaLookup; @@ -266,10 +269,10 @@ struct PromoteMem2Reg { public: PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, - AliasSetTracker *AST) + AliasSetTracker *AST, AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AST(AST) {} + AST(AST), AC(AC) {} void run(); @@ -690,7 +693,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -984,10 +987,10 @@ NextIteration: } void llvm::PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, - AliasSetTracker *AST) { + AliasSetTracker *AST, AssumptionCache *AC) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, AST).run(); + PromoteMem2Reg(Allocas, DT, AST, AC).run(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 668e22f46f4..3846b21c502 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -167,6 +167,7 @@ class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout &DL; unsigned BonusInstThreshold; + AssumptionCache *AC; SmallPtrSetImpl *LoopHeaders; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases( @@ -190,9 +191,9 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, + unsigned BonusInstThreshold, AssumptionCache *AC, SmallPtrSetImpl *LoopHeaders) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), LoopHeaders(LoopHeaders) {} bool run(BasicBlock *BB); @@ -3479,7 +3480,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, - const TargetTransformInfo &TTI, unsigned BonusInstThreshold) { + const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + AssumptionCache *AC) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -3514,7 +3516,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -3530,7 +3532,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -4327,16 +4329,17 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout &DL) { +static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, SI); + computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) // bits are in the condition value. - unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, SI) - 1; + unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; // Gather dead cases. @@ -4756,7 +4759,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, + AssumptionCache *AC, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -5503,12 +5506,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -5518,28 +5521,28 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + if (EliminateDeadSwitchCases(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToSelect(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + if (SwitchToSelect(SI, Builder, AC, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (SwitchToLookupTable(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -5577,7 +5580,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } return Changed; } @@ -5686,7 +5689,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, ; if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, - BonusInstThreshold)) + BonusInstThreshold, AC)) return true; } @@ -5704,7 +5707,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. if (FoldBranchToCommonDest(BI, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -5729,7 +5732,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -5739,14 +5742,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else if (&*I == cast(BI->getCondition())) { ++I; // Ignore dbg intrinsics. while (isa(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } @@ -5773,7 +5776,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { : ConstantInt::getFalse(BB->getContext()); BI->setCondition(CI); RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } } @@ -5782,7 +5785,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -5791,7 +5794,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -5799,7 +5802,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -5808,7 +5811,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // If this is a branch on a phi node in the current block, thread control @@ -5816,14 +5819,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Look for diamond patterns. if (MergeCondStores) @@ -5831,7 +5834,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) if (mergeConditionalStores(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -5993,9 +5996,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, + unsigned BonusInstThreshold, AssumptionCache *AC, SmallPtrSetImpl *LoopHeaders) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, LoopHeaders) + BonusInstThreshold, AC, LoopHeaders) .run(BB); } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 985a8bccbd3..1220490123c 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" @@ -34,7 +35,7 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, AssumptionCache *AC) { const DataLayout &DL = F.getParent()->getDataLayout(); SmallPtrSet S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -53,7 +54,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast(U)); @@ -92,6 +93,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -104,7 +106,9 @@ namespace { &getAnalysis().getDomTree(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); - return runImpl(F, DT, TLI); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); + return runImpl(F, DT, TLI, AC); } }; } @@ -112,6 +116,7 @@ namespace { char InstSimplifier::ID = 0; INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(InstSimplifier, "instsimplify", @@ -127,7 +132,8 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); - bool Changed = runImpl(F, &DT, &TLI); + auto &AC = AM.getResult(F); + bool Changed = runImpl(F, &DT, &TLI, &AC); if (!Changed) return PreservedAnalyses::all(); // FIXME: This should also 'preserve the CFG'. diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 1c0a48bd9c6..c8f030f7eb8 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -461,7 +461,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, CI, nullptr); + computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, + nullptr); KnownZero.flipAllBits(); size_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index ca55ceaccbc..c44a393cf84 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -330,7 +330,7 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { if (!Safe) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, OpA, &DT); + computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT); KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); if (KnownZero != 0) Safe = true; @@ -819,7 +819,7 @@ bool Vectorizer::vectorizeStoreChain( unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(), StackAdjustedAlignment, - DL, S0, &DT); + DL, S0, nullptr, &DT); if (NewAlign < StackAdjustedAlignment) return false; } @@ -960,7 +960,7 @@ bool Vectorizer::vectorizeLoadChain( unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(), StackAdjustedAlignment, - DL, L0, &DT); + DL, L0, nullptr, &DT); if (NewAlign < StackAdjustedAlignment) return false; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index afcb50b2795..70269ed61f5 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -369,12 +369,12 @@ public: InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned VecWidth, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - ORE(ORE), VF(VecWidth), UF(UnrollFactor), + AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth), TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM), @@ -706,6 +706,8 @@ protected: const TargetLibraryInfo *TLI; /// Target Transform Info. const TargetTransformInfo *TTI; + /// Assumption Cache. + AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -788,11 +790,11 @@ public: InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) - : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, ORE, 1, + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1, UnrollFactor, LVL, CM) {} private: @@ -1848,10 +1850,11 @@ public: LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints) : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - ORE(ORE), TheFunction(F), Hints(Hints) {} + AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {} /// Information about vectorization costs struct VectorizationFactor { @@ -2004,6 +2007,8 @@ public: const TargetLibraryInfo *TLI; /// Demanded bits analysis. DemandedBits *DB; + /// Assumption cache. + AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -2117,6 +2122,7 @@ struct LoopVectorize : public FunctionPass { auto *TLIP = getAnalysisIfAvailable(); auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; auto *AA = &getAnalysis().getAAResults(); + auto *AC = &getAnalysis().getAssumptionCache(F); auto *LAA = &getAnalysis(); auto *DB = &getAnalysis().getDemandedBits(); auto *ORE = &getAnalysis().getORE(); @@ -2124,11 +2130,12 @@ struct LoopVectorize : public FunctionPass { std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; - return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, GetLAA, - *ORE); + return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, + GetLAA, *ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addRequired(); @@ -3056,6 +3063,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Add the cloned scalar to the scalar map entry. Entry[Part][Lane] = Cloned; + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // End if-block. if (IfPredicateInstr) PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); @@ -7168,6 +7180,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) @@ -7196,7 +7209,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. - CodeMetrics::collectEphemeralValues(TheLoop, ValuesToIgnore); + CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); // Ignore type-promoting instructions we identified during reduction // detection. @@ -7270,6 +7283,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Add the cloned scalar to the scalar map entry. Entry[Part][0] = Cloned; + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // End if-block. if (IfPredicateInstr) PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); @@ -7409,7 +7427,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, ORE, F, + LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints); CM.collectValuesToIgnore(); @@ -7545,7 +7563,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. - InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, ORE, IC, &LVL, &CM); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, + &CM); Unroller.vectorize(); ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -7554,8 +7573,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { << NV("InterleaveCount", IC) << ")"); } else { // If we decided that it is *legal* to vectorize the loop, then do it. - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, ORE, VF.Width, IC, &LVL, - &CM); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, + &LVL, &CM); LB.vectorize(); ++LoopsVectorized; @@ -7583,7 +7602,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool LoopVectorizePass::runImpl( Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, - DemandedBits &DB_, AliasAnalysis &AA_, + DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function &GetLAA_, OptimizationRemarkEmitter &ORE_) { @@ -7594,6 +7613,7 @@ bool LoopVectorizePass::runImpl( BFI = &BFI_; TLI = TLI_; AA = &AA_; + AC = &AC_; GetLAA = &GetLAA_; DB = &DB_; ORE = &ORE_; @@ -7643,6 +7663,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, auto &BFI = AM.getResult(F); auto *TLI = AM.getCachedResult(F); auto &AA = AM.getResult(F); + auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); @@ -7652,7 +7673,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, return LAM.getResult(L); }; bool Changed = - runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, GetLAA, ORE); + runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 453ecddaaa4..bcaa8439cff 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -308,11 +307,12 @@ public: BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, - DominatorTree *Dt, DemandedBits *DB, const DataLayout *DL) + DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, + const DataLayout *DL) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), - SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), DB(DB), + SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), DL(DL), Builder(Se->getContext()) { - CodeMetrics::collectEphemeralValues(F, EphValues); + CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. // TODO: It would be better to limit the vectorization factor based on @@ -901,6 +901,7 @@ private: AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; + AssumptionCache *AC; DemandedBits *DB; const DataLayout *DL; unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. @@ -3539,7 +3540,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine the maximum number of bits required to store the scalar // values. for (auto *Scalar : ToDemote) { - auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, 0, DT); + auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); MaxBitWidth = std::max(NumTypeBits - NumSignBits, MaxBitWidth); } @@ -3611,13 +3612,15 @@ struct SLPVectorizer : public FunctionPass { auto *AA = &getAnalysis().getAAResults(); auto *LI = &getAnalysis().getLoopInfo(); auto *DT = &getAnalysis().getDomTree(); + auto *AC = &getAnalysis().getAssumptionCache(F); auto *DB = &getAnalysis().getDemandedBits(); - return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, DB); + return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); } void getAnalysisUsage(AnalysisUsage &AU) const override { FunctionPass::getAnalysisUsage(AU); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -3640,9 +3643,10 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A auto *AA = &AM.getResult(F); auto *LI = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *AC = &AM.getResult(F); auto *DB = &AM.getResult(F); - bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, DB); + bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -3657,13 +3661,14 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, DominatorTree *DT_, - DemandedBits *DB_) { + AssumptionCache *AC_, DemandedBits *DB_) { SE = SE_; TTI = TTI_; TLI = TLI_; AA = AA_; LI = LI_; DT = DT_; + AC = AC_; DB = DB_; DL = &F.getParent()->getDataLayout(); @@ -3684,7 +3689,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, DB, DL); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -4933,6 +4938,7 @@ static const char lv_name[] = "SLP Vectorizer"; INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) diff --git a/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll b/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll index 98739c32925..bc01f22b3f3 100644 --- a/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll +++ b/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll @@ -55,7 +55,7 @@ loop: %cmp = icmp slt i32 %iv, 10000 ; CHECK: %iv.sext = sext i32 %iv to i64 ; CHECK-NEXT: --> {0,+,3}<%loop> - call void @llvm.assume(i1 %cmp) [ "affected"(i32 %iv) ] + call void @llvm.assume(i1 %cmp) %c = load volatile i1, i1* %cond br i1 %c, label %loop, label %leave @@ -159,7 +159,7 @@ loop: %cmp = icmp ugt i32 %iv.inc, -10000 ; CHECK: %iv.zext = zext i32 %iv to i64 ; CHECK-NEXT: --> {30000,+,-2}<%loop> - call void @llvm.assume(i1 %cmp) [ "affected"(i32 %iv.inc) ] + call void @llvm.assume(i1 %cmp) %c = load volatile i1, i1* %cond br i1 %c, label %loop, label %leave diff --git a/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/test/Analysis/ScalarEvolution/nsw-offset-assume.ll index 2e3951a60dd..5413b477df5 100644 --- a/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ b/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -11,7 +11,7 @@ define void @foo(i32 %no, double* nocapture %d, double* nocapture %q) nounwind { entry: %n = and i32 %no, 4294967294 %0 = icmp sgt i32 %n, 0 ; [#uses=1] - tail call void @llvm.assume(i1 %0) [ "affected"(i32 %n) ] + tail call void @llvm.assume(i1 %0) br label %bb.nph bb.nph: ; preds = %entry diff --git a/test/Transforms/CorrelatedValuePropagation/conflict.ll b/test/Transforms/CorrelatedValuePropagation/conflict.ll index 35623ee9c12..ef566856ed7 100644 --- a/test/Transforms/CorrelatedValuePropagation/conflict.ll +++ b/test/Transforms/CorrelatedValuePropagation/conflict.ll @@ -26,7 +26,7 @@ declare void @llvm.assume(i1) define i8 @test2(i8 %a) { ; CHECK-LABEL: @test2 %cmp1 = icmp eq i8 %a, 5 - call void @llvm.assume(i1 %cmp1) [ "affected"(i8 %a) ] + call void @llvm.assume(i1 %cmp1) %cmp2 = icmp eq i8 %a, 3 ; CHECK: br i1 false, label %dead, label %exit br i1 %cmp2, label %dead, label %exit @@ -43,7 +43,7 @@ define i8 @test3(i8 %a) { dead: %cmp2 = icmp eq i8 %a, 3 ; CHECK: call void @llvm.assume(i1 false) - call void @llvm.assume(i1 %cmp2) [ "affected"(i8 %a) ] + call void @llvm.assume(i1 %cmp2) ret i8 %a exit: ret i8 0 diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll index 4e18d0ab90c..4bdbcc8d086 100644 --- a/test/Transforms/InstCombine/assume-redundant.ll +++ b/test/Transforms/InstCombine/assume-redundant.ll @@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 { ; CHECK-LABEL: @_Z3fooR1s -; CHECK: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, double* %{{.*}}) ] +; CHECK: call void @llvm.assume ; CHECK-NOT: call void @llvm.assume entry: diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll index da935925197..2f9213820f2 100644 --- a/test/Transforms/InstCombine/assume.ll +++ b/test/Transforms/InstCombine/assume.ll @@ -11,7 +11,7 @@ entry: ; been removed: ; CHECK-LABEL: @foo1 ; CHECK-DAG: load i32, i32* %a, align 32 -; CHECK-DAG: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, i32* %a) ] +; CHECK-DAG: call void @llvm.assume ; CHECK: ret i32 %ptrint = ptrtoint i32* %a to i64 @@ -28,7 +28,7 @@ entry: ; Same check as in @foo1, but make sure it works if the assume is first too. ; CHECK-LABEL: @foo2 ; CHECK-DAG: load i32, i32* %a, align 32 -; CHECK-DAG: call void @llvm.assume(i1 %maskcond) [ "affected"(i64 %maskedptr, i64 %ptrint, i32* %a) ] +; CHECK-DAG: call void @llvm.assume ; CHECK: ret i32 %ptrint = ptrtoint i32* %a to i64 @@ -51,7 +51,7 @@ entry: ; CHECK: ret i32 4 %cmp = icmp eq i32 %a, 4 - tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] + tail call void @llvm.assume(i1 %cmp) ret i32 %a } @@ -93,7 +93,7 @@ entry: %and1 = and i32 %a, 3 ; CHECK-LABEL: @bar1 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -107,7 +107,7 @@ entry: define i32 @bar2(i32 %a) #0 { entry: ; CHECK-LABEL: @bar2 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -125,7 +125,7 @@ entry: ; Don't be fooled by other assumes around. ; CHECK-LABEL: @bar3 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 1 tail call void @llvm.assume(i1 %x) @@ -145,8 +145,8 @@ entry: %and1 = and i32 %b, 3 ; CHECK-LABEL: @bar4 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] -; CHECK: call void @llvm.assume(i1 %cmp2) [ "affected"(i32 %a, i32 %b) ] +; CHECK: call void @llvm.assume +; CHECK: call void @llvm.assume ; CHECK: ret i32 1 %and = and i32 %a, 7 @@ -167,7 +167,7 @@ entry: ret i32 %conv ; CHECK-LABEL: @icmp1 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 1 } @@ -182,7 +182,7 @@ entry: ret i32 %lnot.ext ; CHECK-LABEL: @icmp2 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 0 } @@ -217,7 +217,7 @@ entry: ; CHECK-LABEL: @nonnull2 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %load) ] +; CHECK: call void @llvm.assume } ; Make sure the above canonicalization does not trigger @@ -236,7 +236,7 @@ not_taken: ; CHECK-LABEL: @nonnull3 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32* %load) ] +; CHECK: call void @llvm.assume } ; Make sure the above canonicalization does not trigger @@ -254,7 +254,7 @@ entry: ; CHECK-LABEL: @nonnull4 ; CHECK-NOT: !nonnull -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32* %load) ] +; CHECK: call void @llvm.assume } diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll index 65bc2f962af..c41bbaa04eb 100644 --- a/test/Transforms/InstCombine/assume2.ll +++ b/test/Transforms/InstCombine/assume2.ll @@ -9,7 +9,7 @@ declare void @llvm.assume(i1) #1 define i32 @test1(i32 %a) #0 { entry: ; CHECK-LABEL: @test1 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %and, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 5 %and = and i32 %a, 15 @@ -24,7 +24,7 @@ entry: define i32 @test2(i32 %a) #0 { entry: ; CHECK-LABEL: @test2 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a.not, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 2 %and = and i32 %a, 15 @@ -40,7 +40,7 @@ entry: define i32 @test3(i32 %a) #0 { entry: ; CHECK-LABEL: @test3 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 5 %v = or i32 %a, 4294967280 @@ -55,7 +55,7 @@ entry: define i32 @test4(i32 %a) #0 { entry: ; CHECK-LABEL: @test4 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a.not, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 2 %v = or i32 %a, 4294967280 @@ -71,7 +71,7 @@ entry: define i32 @test5(i32 %a) #0 { entry: ; CHECK-LABEL: @test5 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 4 %v = xor i32 %a, 1 @@ -86,7 +86,7 @@ entry: define i32 @test6(i32 %a) #0 { entry: ; CHECK-LABEL: @test6 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 5 %v = shl i32 %a, 2 @@ -101,7 +101,7 @@ entry: define i32 @test7(i32 %a) #0 { entry: ; CHECK-LABEL: @test7 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 20 %v = lshr i32 %a, 2 @@ -116,7 +116,7 @@ entry: define i32 @test8(i32 %a) #0 { entry: ; CHECK-LABEL: @test8 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %v.mask, i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 20 %v = lshr i32 %a, 2 @@ -131,7 +131,7 @@ entry: define i32 @test9(i32 %a) #0 { entry: ; CHECK-LABEL: @test9 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 0 %cmp = icmp sgt i32 %a, 5 @@ -145,7 +145,7 @@ entry: define i32 @test10(i32 %a) #0 { entry: ; CHECK-LABEL: @test10 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 -2147483648 %cmp = icmp sle i32 %a, -2 @@ -159,7 +159,7 @@ entry: define i32 @test11(i32 %a) #0 { entry: ; CHECK-LABEL: @test11 -; CHECK: call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] +; CHECK: call void @llvm.assume ; CHECK: ret i32 0 %cmp = icmp ule i32 %a, 256 diff --git a/test/Transforms/InstSimplify/add-mask.ll b/test/Transforms/InstSimplify/add-mask.ll index ac2447ac3a6..e30a35f5312 100644 --- a/test/Transforms/InstSimplify/add-mask.ll +++ b/test/Transforms/InstSimplify/add-mask.ll @@ -46,7 +46,7 @@ define i1 @test4(i32 %a) { %b = load i32, i32* @B %b.and = and i32 %b, 1 %b.cnd = icmp eq i32 %b.and, 1 - call void @llvm.assume(i1 %b.cnd) [ "affected"(i32 %b.and, i32 %b) ] + call void @llvm.assume(i1 %b.cnd) %rhs = add i32 %a, %b %and = and i32 %a, %rhs diff --git a/test/Transforms/JumpThreading/assume-edge-dom.ll b/test/Transforms/JumpThreading/assume-edge-dom.ll index ff087693ea8..f1d0f41e250 100644 --- a/test/Transforms/JumpThreading/assume-edge-dom.ll +++ b/test/Transforms/JumpThreading/assume-edge-dom.ll @@ -14,12 +14,12 @@ entry: taken: %res1 = call i8* @escape() %a = icmp eq i8* %res1, null - tail call void @llvm.assume(i1 %a) [ "affected"(i8* %res1) ] + tail call void @llvm.assume(i1 %a) br label %done not_taken: %res2 = call i8* @escape() %b = icmp ne i8* %res2, null - tail call void @llvm.assume(i1 %b) [ "affected"(i8* %res2) ] + tail call void @llvm.assume(i1 %b) br label %done ; An assume that can be used to simplify this comparison dominates each diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll index fdaa07ff131..53010b71c72 100644 --- a/test/Transforms/JumpThreading/assume.ll +++ b/test/Transforms/JumpThreading/assume.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @test1(i32 %a, i32 %b) #0 { entry: %cmp = icmp sgt i32 %a, 5 - tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] + tail call void @llvm.assume(i1 %cmp) %cmp1 = icmp sgt i32 %b, 1234 br i1 %cmp1, label %if.then, label %if.else @@ -36,7 +36,7 @@ return: ; preds = %if.else, %if.then, define i32 @test2(i32 %a) #0 { entry: %cmp = icmp sgt i32 %a, 5 - tail call void @llvm.assume(i1 %cmp) [ "affected"(i32 %a) ] + tail call void @llvm.assume(i1 %cmp) %cmp1 = icmp sgt i32 %a, 3 br i1 %cmp1, label %if.then, label %return diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll index 84d2551469a..299c18c871e 100644 --- a/test/Transforms/LoopRotate/basic.ll +++ b/test/Transforms/LoopRotate/basic.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s -; RUN: opt -S -passes='require,loop(rotate)' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll index 827f6445e38..a08e07e9644 100644 --- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -75,10 +75,10 @@ define void @reassociate_gep_assume(float* %a, i32 %i, i32 %j) { ; CHECK-LABEL: @reassociate_gep_assume( ; assume(j >= 0) %cmp = icmp sgt i32 %j, -1 - call void @llvm.assume(i1 %cmp) [ "affected"(i32 %j) ] + call void @llvm.assume(i1 %cmp) %1 = add i32 %i, %j %cmp2 = icmp sgt i32 %1, -1 - call void @llvm.assume(i1 %cmp2) [ "affected"(i32 %1) ] + call void @llvm.assume(i1 %cmp2) %idxprom.j = zext i32 %j to i64 %2 = getelementptr float, float* %a, i64 %idxprom.j diff --git a/test/Transforms/SimplifyCFG/switch-dead-default.ll b/test/Transforms/SimplifyCFG/switch-dead-default.ll index c77b3e2335e..e5c2ef65b31 100644 --- a/test/Transforms/SimplifyCFG/switch-dead-default.ll +++ b/test/Transforms/SimplifyCFG/switch-dead-default.ll @@ -91,7 +91,7 @@ define void @test5(i8 %a) { ; CHECK-LABEL: @test5 ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %cmp = icmp ult i8 %a, 2 - call void @llvm.assume(i1 %cmp) [ "affected"(i8 %a) ] + call void @llvm.assume(i1 %cmp) switch i8 %a, label %default [i8 1, label %true i8 0, label %false] true: @@ -112,7 +112,7 @@ define void @test6(i8 %a) { ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %and = and i8 %a, 254 %cmp = icmp eq i8 %and, 254 - call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] + call void @llvm.assume(i1 %cmp) switch i8 %a, label %default [i8 255, label %true i8 254, label %false] true: @@ -134,7 +134,7 @@ define void @test7(i8 %a) { ; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false %and = and i8 %a, 254 %cmp = icmp eq i8 %and, 254 - call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] + call void @llvm.assume(i1 %cmp) switch i8 %a, label %default [i8 255, label %true i8 254, label %false i8 0, label %also_dead] @@ -162,7 +162,7 @@ define void @test8(i8 %a) { ; CHECK: switch i8 %and = and i8 %a, 254 %cmp = icmp eq i8 %and, undef - call void @llvm.assume(i1 %cmp) [ "affected"(i8 %and, i8 %a) ] + call void @llvm.assume(i1 %cmp) switch i8 %a, label %default [i8 255, label %true i8 254, label %false] true: diff --git a/unittests/Analysis/AliasAnalysisTest.cpp b/unittests/Analysis/AliasAnalysisTest.cpp index 7305abf7afe..84a04257bc2 100644 --- a/unittests/Analysis/AliasAnalysisTest.cpp +++ b/unittests/Analysis/AliasAnalysisTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/AsmParser/Parser.h" @@ -143,6 +144,7 @@ protected: Module M; TargetLibraryInfoImpl TLII; TargetLibraryInfo TLI; + std::unique_ptr AC; std::unique_ptr BAR; std::unique_ptr AAR; @@ -153,7 +155,8 @@ protected: AAR.reset(new AAResults(TLI)); // Build the various AA results and register them. - BAR.reset(new BasicAAResult(M.getDataLayout(), TLI)); + AC.reset(new AssumptionCache(F)); + BAR.reset(new BasicAAResult(M.getDataLayout(), TLI, *AC)); AAR->addAAResult(*BAR); return *AAR; diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp index eded23e8430..752cc812824 100644 --- a/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/unittests/Analysis/ScalarEvolutionTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" @@ -37,15 +38,17 @@ protected: TargetLibraryInfoImpl TLII; TargetLibraryInfo TLI; + std::unique_ptr AC; std::unique_ptr DT; std::unique_ptr LI; ScalarEvolutionsTest() : M("", Context), TLII(), TLI(TLII) {} ScalarEvolution buildSE(Function &F) { + AC.reset(new AssumptionCache(F)); DT.reset(new DominatorTree(F)); LI.reset(new LoopInfo(*DT)); - return ScalarEvolution(F, TLI, *DT, *LI); + return ScalarEvolution(F, TLI, *AC, *DT, *LI); } void runWithFunctionAndSE( diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h index f0de2d023a1..a3d3d1fffdb 100644 --- a/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h +++ b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h @@ -42,6 +42,10 @@ protected: InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); + // FIXME: It isn't at all clear why this is necesasry, but without it we + // fail to initialize the AssumptionCacheTracker. + initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry()); + #ifdef LLVM_ON_WIN32 // On Windows, generate ELF objects by specifying "-elf" in triple HostTriple += "-elf"; diff --git a/unittests/Transforms/Utils/MemorySSA.cpp b/unittests/Transforms/Utils/MemorySSA.cpp index 72bf25e1cc6..c290e5f4073 100644 --- a/unittests/Transforms/Utils/MemorySSA.cpp +++ b/unittests/Transforms/Utils/MemorySSA.cpp @@ -39,6 +39,7 @@ protected: // Things that we need to build after the function is created. struct TestAnalyses { DominatorTree DT; + AssumptionCache AC; AAResults AA; BasicAAResult BAA; // We need to defer MSSA construction until AA is *entirely* set up, which @@ -47,8 +48,8 @@ protected: MemorySSAWalker *Walker; TestAnalyses(MemorySSATest &Test) - : DT(*Test.F), AA(Test.TLI), - BAA(Test.DL, Test.TLI, &DT) { + : DT(*Test.F), AC(*Test.F), AA(Test.TLI), + BAA(Test.DL, Test.TLI, AC, &DT) { AA.addAAResult(BAA); MSSA = make_unique(*Test.F, &AA, &DT); Walker = MSSA->getWalker();