From 5c273e628c5f940bd5bbd0bcc5e8177eeb279f37 Mon Sep 17 00:00:00 2001 From: Sidharth Baveja Date: Wed, 8 Jul 2020 18:59:01 +0000 Subject: [PATCH] [NFC] Separate Peeling Properties into its own struct Summary: This patch makes the peeling properties of the loop accessible by other loop transformations. Author: sidbav (Sidharth Baveja) Reviewers: Whitney (Whitney Tsang), Meinersbur (Michael Kruse), skatkov (Serguei Katkov), ashlykov (Arkady Shlykov), bogner (Justin Bogner), hfinkel (Hal Finkel) Reviewed By: Meinersbur (Michael Kruse) Subscribers: fhahn (Florian Hahn), hiraditya (Aditya Kumar), llvm-commits, LLVM Tag: LLVM Differential Revision: https://reviews.llvm.org/D80580 --- include/llvm/Analysis/TargetTransformInfo.h | 42 ++++++++----- .../llvm/Analysis/TargetTransformInfoImpl.h | 3 + include/llvm/CodeGen/BasicTTIImpl.h | 8 +++ include/llvm/Transforms/Utils/UnrollLoop.h | 13 +++- lib/Analysis/TargetTransformInfo.cpp | 5 ++ .../AArch64/AArch64TargetTransformInfo.cpp | 5 ++ .../AArch64/AArch64TargetTransformInfo.h | 3 + .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 14 +++++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 8 +++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 5 ++ lib/Target/ARM/ARMTargetTransformInfo.h | 2 + .../Hexagon/HexagonTargetTransformInfo.cpp | 7 ++- .../Hexagon/HexagonTargetTransformInfo.h | 3 + lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 5 ++ lib/Target/NVPTX/NVPTXTargetTransformInfo.h | 4 ++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++ lib/Target/PowerPC/PPCTargetTransformInfo.h | 2 + .../SystemZ/SystemZTargetTransformInfo.cpp | 4 ++ .../SystemZ/SystemZTargetTransformInfo.h | 3 + .../Scalar/LoopUnrollAndJamPass.cpp | 12 ++-- lib/Transforms/Scalar/LoopUnrollPass.cpp | 61 +++++++++++-------- lib/Transforms/Utils/LoopUnrollPeel.cpp | 25 ++++---- 22 files changed, 180 insertions(+), 58 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 695b7d6061c..b6698eefdb0 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -450,11 +450,6 @@ public: /// transformation will select an unrolling factor based on the current cost /// threshold and other factors. unsigned Count; - /// A forced peeling factor (the number of bodied of the original loop - /// that should be peeled off before the loop body). When set to 0, the - /// unrolling transformation will select a peeling factor based on profile - /// information and other factors. - unsigned PeelCount; /// Default unroll count for loops with run-time trip count. unsigned DefaultUnrollRuntimeCount; // Set the maximum unrolling factor. The unrolling factor may be selected @@ -488,19 +483,10 @@ public: bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations. - bool AllowPeeling; - /// Allow peeling off loop iterations for loop nests. - bool AllowLoopNestsPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; - /// Allow peeling basing on profile. Uses to enable peeling off all - /// iterations basing on provided profile. - /// If the value is true the peeling cost model can decide to peel only - /// some iterations and in this case it will set this to false. - bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner @@ -534,6 +520,28 @@ public: /// intrinsic is supported. bool emitGetActiveLaneMask() const; + // Parameters that control the loop peeling transformation + struct PeelingPreferences { + /// A forced peeling factor (the number of bodied of the original loop + /// that should be peeled off before the loop body). When set to 0, the + /// a peeling factor based on profile information and other factors. + unsigned PeelCount; + /// Allow peeling off loop iterations. + bool AllowPeeling; + /// Allow peeling off loop iterations for loop nests. + bool AllowLoopNestsPeeling; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iterations and in this case it will set this to false. + bool PeelProfiledIterations; + }; + + /// Get target-customized preferences for the generic loop peeling + /// transformation. The caller will initialize \p PP with the current + /// target-independent defaults with information from \p L and \p SE. + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + PeelingPreferences &PP) const; /// @} /// \name Scalar Target Information @@ -1282,6 +1290,8 @@ public: virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; + virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + PeelingPreferences &PP) = 0; virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, @@ -1560,6 +1570,10 @@ public: UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, SE, UP); } + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + PeelingPreferences &PP) override { + return Impl.getPeelingPreferences(L, SE, PP); + } bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) override { diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index ca7106ab98a..0ce975d6d4b 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -150,6 +150,9 @@ public: void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &) {} + void getPeelingPreferences(Loop *, ScalarEvolution &, + TTI::PeelingPreferences &) {} + bool isLegalAddImmediate(int64_t Imm) { return false; } bool isLegalICmpImmediate(int64_t Imm) { return false; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index c6a9a65ae6c..f9d32eadd23 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -451,6 +451,14 @@ public: UP.BEInsns = 2; } + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + } + bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index 1970cefcefb..bb3d02b9595 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -94,6 +94,7 @@ bool UnrollRuntimeLoopRemainder( void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE); bool canPeel(Loop *L); @@ -119,6 +120,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, + bool &UseUpperBound); void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, @@ -133,9 +136,13 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling, - Optional UserFullUnrollMaxCount); + Optional UserUpperBound, Optional UserFullUnrollMaxCount); + +TargetTransformInfo::PeelingPreferences +gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling); unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 87c6f83938e..2f051e53790 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -327,6 +327,11 @@ void TargetTransformInfo::getUnrollingPreferences( return TTIImpl->getUnrollingPreferences(L, SE, UP); } +void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + PeelingPreferences &PP) const { + return TTIImpl->getPeelingPreferences(L, SE, PP); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index be0c51b83a2..cf6de797727 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -859,6 +859,11 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, getFalkorUnrollingPreferences(L, SE, UP); } +void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} + Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) { switch (Inst->getIntrinsicID()) { diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 27afb2e5a7d..094b04c95db 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -153,6 +153,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType); diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 24f079ffe92..46051ac14b5 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -236,6 +236,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } } +void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { // The concept of vector registers doesn't really exist. Some packed vector // operations operate on the normal 32-bit registers. @@ -990,6 +994,11 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } +void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + CommonTTI.getPeelingPreferences(L, SE, PP); +} + unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } @@ -1096,3 +1105,8 @@ void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { CommonTTI.getUnrollingPreferences(L, SE, UP); } + +void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + CommonTTI.getPeelingPreferences(L, SE, PP); +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 508ed061e93..b913f5194e4 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -61,6 +61,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); }; class GCNTTIImpl final : public BasicTTIImplBase { @@ -141,6 +144,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TTI::PSK_FastHardware; @@ -258,6 +264,8 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); unsigned getHardwareNumberOfRegisters(bool Vec) const; unsigned getNumberOfRegisters(bool Vec) const; unsigned getRegisterBitWidth(bool Vector) const; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 44dfb9e8c12..74b1331216a 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1582,6 +1582,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Force = true; } +void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} + bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { return ST->hasMVEIntegerOps(); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 5d914227c96..537a546361e 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -251,6 +251,8 @@ public: bool emitGetActiveLaneMask() const; + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); bool shouldBuildLookupTablesForConstant(Constant *C) const { // In the ROPI and RWPI relocation models we can't have pointers to global // variables or functions in constant data, so don't convert switches to diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 76df4e8e193..80c8736cb74 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -78,12 +78,17 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; +} + +void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); // Only try to peel innermost loops with small runtime trip counts. if (L && L->empty() && canPeel(L) && SE.getSmallConstantTripCount(L) == 0 && SE.getSmallConstantMaxTripCount(L) > 0 && SE.getSmallConstantMaxTripCount(L) <= 5) { - UP.PeelCount = 2; + PP.PeelCount = 2; } } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 3365c5bf1cb..5fe39748640 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -64,6 +64,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + /// Bias LSR towards creating post-increment opportunities. bool shouldFavorPostInc() const; diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 5c14d0f1a24..3873c73fb2e 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -155,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Partial = UP.Runtime = true; UP.PartialThreshold = UP.Threshold / 4; } + +void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 88156f68728..cb832031f1a 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -95,6 +95,10 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { // Volatile loads/stores are only supported for shared and global address // spaces, or for generic AS that maps to them. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f2c746a1429..53556ffc267 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -568,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getUnrollingPreferences(L, SE, UP); } +void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} // This function returns true to allow using coldcc calling convention. // Returning true results in coldcc being used for functions which are cold at // all call sites when the callers of the functions are not calling any other diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index b831789d3e6..d998521084e 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -66,6 +66,8 @@ public: TargetLibraryInfo *LibInfo); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 36141426e27..864200e5f71 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -294,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Force = true; } +void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) { diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index d20541774da..7f8f7f6f923 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -50,6 +50,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); /// @} diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index f0ece1faa5f..285cba6ee20 100644 --- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -158,7 +158,8 @@ static bool computeUnrollAndJamCount( const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, - unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) { + unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP) { // First up use computeUnrollCount from the loop unroller to get a count // for unrolling the outer loop, plus any loops requiring explicit // unrolling we leave to the unroller. This uses UP.Threshold / @@ -168,7 +169,8 @@ static bool computeUnrollAndJamCount( bool UseUpperBound = false; bool ExplicitUnroll = computeUnrollCount( L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, - /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); + /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP, + UseUpperBound); if (ExplicitUnroll || UseUpperBound) { // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it // for the unroller instead. @@ -282,7 +284,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, OptimizationRemarkEmitter &ORE, int OptLevel) { TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, - None, None, None, None, None, None, None); + None, None, None, None, None); + TargetTransformInfo::PeelingPreferences PP = + gatherPeelingPreferences(L, SE, TTI, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) @@ -367,7 +371,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // Decide if, and by how much, to unroll bool IsCountSetExplicitly = computeUnrollAndJamCount( L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount, - OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP); + OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP); if (UP.Count <= 1) return LoopUnrollResult::Unmodified; // Unroll factor (Count) must be less or equal to TripCount. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ec56610e41e..88845cde8d4 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -193,9 +193,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling, - Optional UserFullUnrollMaxCount) { + Optional UserUpperBound, Optional UserFullUnrollMaxCount) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -206,7 +204,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.PartialThreshold = 150; UP.PartialOptSizeThreshold = 0; UP.Count = 0; - UP.PeelCount = 0; UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = std::numeric_limits::max(); UP.FullUnrollMaxCount = std::numeric_limits::max(); @@ -218,10 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; - UP.AllowPeeling = true; - UP.AllowLoopNestsPeeling = false; UP.UnrollAndJam = false; - UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; @@ -249,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.MaxCount = UnrollMaxCount; if (UnrollFullMaxCount.getNumOccurrences() > 0) UP.FullUnrollMaxCount = UnrollFullMaxCount; - if (UnrollPeelCount.getNumOccurrences() > 0) - UP.PeelCount = UnrollPeelCount; if (UnrollAllowPartial.getNumOccurrences() > 0) UP.Partial = UnrollAllowPartial; if (UnrollAllowRemainder.getNumOccurrences() > 0) @@ -259,10 +251,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Runtime = UnrollRuntime; if (UnrollMaxUpperBound == 0) UP.UpperBound = false; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - UP.AllowPeeling = UnrollAllowPeeling; - if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) - UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0) @@ -281,16 +269,39 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Runtime = *UserRuntime; if (UserUpperBound.hasValue()) UP.UpperBound = *UserUpperBound; - if (UserAllowPeeling.hasValue()) - UP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) - UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; if (UserFullUnrollMaxCount.hasValue()) UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; } +TargetTransformInfo::PeelingPreferences +llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling) { + TargetTransformInfo::PeelingPreferences PP; + + // Get Target Specifc Values + TTI.getPeelingPreferences(L, SE, PP); + + // User Specified Values using cl::opt + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + + // User Specifed values provided by argument + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + namespace { /// A struct to densely store the state of an instruction after unrolling at @@ -761,7 +772,8 @@ bool llvm::computeUnrollCount( ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { + TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. @@ -863,8 +875,8 @@ bool llvm::computeUnrollCount( } // 4th priority is loop peeling. - computePeelCount(L, LoopSize, UP, TripCount, SE); - if (UP.PeelCount) { + computePeelCount(L, LoopSize, UP, PP, TripCount, SE); + if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; return ExplicitUnroll; @@ -1067,8 +1079,9 @@ static LoopUnrollResult tryToUnrollLoop( TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); + TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( + L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1142,7 +1155,7 @@ static LoopUnrollResult tryToUnrollLoop( bool UseUpperBound = false; bool IsCountSetExplicitly = computeUnrollCount( L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, - TripMultiple, LoopSize, UP, UseUpperBound); + TripMultiple, LoopSize, UP, PP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; // Unroll factor (Count) must be less or equal to TripCount. @@ -1157,7 +1170,7 @@ static LoopUnrollResult tryToUnrollLoop( LoopUnrollResult UnrollResult = UnrollLoop( L, {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder, + UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) @@ -1189,7 +1202,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) + (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 43dfaf3e50d..c653aacbee6 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -279,19 +279,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); - // Save the UP.PeelCount value set by the target in - // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. - unsigned TargetPeelCount = UP.PeelCount; - UP.PeelCount = 0; + // Save the PP.PeelCount value set by the target in + // TTI.getPeelingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = PP.PeelCount; + PP.PeelCount = 0; if (!canPeel(L)) return; // Only try to peel innermost loops by default. // The constraint can be relaxed by the target in TTI.getUnrollingPreferences // or by the flag -unroll-allow-loop-nests-peeling. - if (!UP.AllowLoopNestsPeeling && !L->empty()) + if (!PP.AllowLoopNestsPeeling && !L->empty()) return; // If the user provided a peel count, use that. @@ -299,13 +300,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (UserPeelCount) { LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); - UP.PeelCount = UnrollForcePeelCount; - UP.PeelProfiledIterations = true; + PP.PeelCount = UnrollForcePeelCount; + PP.PeelProfiledIterations = true; return; } // Skip peeling if it's disabled. - if (!UP.AllowPeeling) + if (!PP.AllowPeeling) return; unsigned AlreadyPeeled = 0; @@ -354,8 +355,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - UP.PeelProfiledIterations = false; + PP.PeelCount = DesiredPeelCount; + PP.PeelProfiledIterations = false; return; } } @@ -367,7 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; // Do not apply profile base peeling if it is disabled. - if (!UP.PeelProfiledIterations) + if (!PP.PeelProfiledIterations) return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually @@ -387,7 +388,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); - UP.PeelCount = *PeelCount; + PP.PeelCount = *PeelCount; return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");