1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[NFC] Separate Peeling Properties into its own struct

Summary:
This patch makes the peeling properties of the loop accessible by other loop transformations.

Author: sidbav (Sidharth Baveja)

Reviewers: Whitney (Whitney Tsang), Meinersbur (Michael Kruse), skatkov (Serguei Katkov), ashlykov (Arkady Shlykov), bogner (Justin Bogner), hfinkel (Hal Finkel)

Reviewed By: Meinersbur (Michael Kruse)

Subscribers: fhahn (Florian Hahn), hiraditya (Aditya Kumar), llvm-commits, LLVM

Tag: LLVM

Differential Revision: https://reviews.llvm.org/D80580
This commit is contained in:
Sidharth Baveja 2020-07-08 18:59:01 +00:00 committed by Anh Tuyen Tran
parent 0cd52758f2
commit 5c273e628c
22 changed files with 180 additions and 58 deletions

View File

@ -450,11 +450,6 @@ public:
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
/// A forced peeling factor (the number of bodied of the original loop
/// that should be peeled off before the loop body). When set to 0, the
/// unrolling transformation will select a peeling factor based on profile
/// information and other factors.
unsigned PeelCount;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
@ -488,19 +483,10 @@ public:
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
/// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow peeling off loop iterations for loop nests.
bool AllowLoopNestsPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
/// Allow peeling basing on profile. Uses to enable peeling off all
/// iterations basing on provided profile.
/// If the value is true the peeling cost model can decide to peel only
/// some iterations and in this case it will set this to false.
bool PeelProfiledIterations;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
@ -534,6 +520,28 @@ public:
/// intrinsic is supported.
bool emitGetActiveLaneMask() const;
// Parameters that control the loop peeling transformation
struct PeelingPreferences {
/// A forced peeling factor (the number of bodied of the original loop
/// that should be peeled off before the loop body). When set to 0, the
/// a peeling factor based on profile information and other factors.
unsigned PeelCount;
/// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow peeling off loop iterations for loop nests.
bool AllowLoopNestsPeeling;
/// Allow peeling basing on profile. Uses to enable peeling off all
/// iterations basing on provided profile.
/// If the value is true the peeling cost model can decide to peel only
/// some iterations and in this case it will set this to false.
bool PeelProfiledIterations;
};
/// Get target-customized preferences for the generic loop peeling
/// transformation. The caller will initialize \p PP with the current
/// target-independent defaults with information from \p L and \p SE.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
/// @}
/// \name Scalar Target Information
@ -1282,6 +1290,8 @@ public:
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
@ -1560,6 +1570,10 @@ public:
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) override {
return Impl.getPeelingPreferences(L, SE, PP);
}
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {

View File

@ -150,6 +150,9 @@ public:
void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &) {}
void getPeelingPreferences(Loop *, ScalarEvolution &,
TTI::PeelingPreferences &) {}
bool isLegalAddImmediate(int64_t Imm) { return false; }
bool isLegalICmpImmediate(int64_t Imm) { return false; }

View File

@ -451,6 +451,14 @@ public:
UP.BEInsns = 2;
}
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
PP.PeelCount = 0;
PP.AllowPeeling = true;
PP.AllowLoopNestsPeeling = false;
PP.PeelProfiledIterations = true;
}
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,

View File

@ -94,6 +94,7 @@ bool UnrollRuntimeLoopRemainder(
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE);
bool canPeel(Loop *L);
@ -119,6 +120,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
unsigned MaxTripCount, bool MaxOrZero,
unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
bool &UseUpperBound);
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
@ -133,9 +136,13 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling,
Optional<unsigned> UserFullUnrollMaxCount);
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences
gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &TTI,
Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling);
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,

View File

@ -327,6 +327,11 @@ void TargetTransformInfo::getUnrollingPreferences(
return TTIImpl->getUnrollingPreferences(L, SE, UP);
}
void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const {
return TTIImpl->getPeelingPreferences(L, SE, PP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);
}

View File

@ -859,6 +859,11 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
getFalkorUnrollingPreferences(L, SE, UP);
}
void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) {
switch (Inst->getIntrinsicID()) {

View File

@ -153,6 +153,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);

View File

@ -236,6 +236,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
}
void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
@ -990,6 +994,11 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
CommonTTI.getPeelingPreferences(L, SE, PP);
}
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
@ -1096,3 +1105,8 @@ void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
CommonTTI.getPeelingPreferences(L, SE, PP);
}

View File

@ -61,6 +61,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
};
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
@ -141,6 +144,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TTI::PSK_FastHardware;
@ -258,6 +264,8 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
unsigned getHardwareNumberOfRegisters(bool Vec) const;
unsigned getNumberOfRegisters(bool Vec) const;
unsigned getRegisterBitWidth(bool Vector) const;

View File

@ -1582,6 +1582,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
return ST->hasMVEIntegerOps();

View File

@ -251,6 +251,8 @@ public:
bool emitGetActiveLaneMask() const;
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool shouldBuildLookupTablesForConstant(Constant *C) const {
// In the ROPI and RWPI relocation models we can't have pointers to global
// variables or functions in constant data, so don't convert switches to

View File

@ -78,12 +78,17 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
}
void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
// Only try to peel innermost loops with small runtime trip counts.
if (L && L->empty() && canPeel(L) &&
SE.getSmallConstantTripCount(L) == 0 &&
SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
UP.PeelCount = 2;
PP.PeelCount = 2;
}
}

View File

@ -64,6 +64,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
/// Bias LSR towards creating post-increment opportunities.
bool shouldFavorPostInc() const;

View File

@ -155,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
}
void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}

View File

@ -95,6 +95,10 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.

View File

@ -568,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getUnrollingPreferences(L, SE, UP);
}
void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
// This function returns true to allow using coldcc calling convention.
// Returning true results in coldcc being used for functions which are cold at
// all call sites when the callers of the functions are not calling any other

View File

@ -66,6 +66,8 @@ public:
TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);

View File

@ -294,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) {

View File

@ -50,6 +50,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}

View File

@ -158,7 +158,8 @@ static bool computeUnrollAndJamCount(
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP) {
// First up use computeUnrollCount from the loop unroller to get a count
// for unrolling the outer loop, plus any loops requiring explicit
// unrolling we leave to the unroller. This uses UP.Threshold /
@ -168,7 +169,8 @@ static bool computeUnrollAndJamCount(
bool UseUpperBound = false;
bool ExplicitUnroll = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
/*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
/*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
UseUpperBound);
if (ExplicitUnroll || UseUpperBound) {
// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
// for the unroller instead.
@ -282,7 +284,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OptimizationRemarkEmitter &ORE, int OptLevel) {
TargetTransformInfo::UnrollingPreferences UP =
gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
None, None, None, None, None, None, None);
None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
@ -367,7 +371,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Decide if, and by how much, to unroll
bool IsCountSetExplicitly = computeUnrollAndJamCount(
L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
if (UP.Count <= 1)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.

View File

@ -193,9 +193,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling,
Optional<unsigned> UserFullUnrollMaxCount) {
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@ -206,7 +204,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
@ -218,10 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;
UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
@ -249,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollPeelCount.getNumOccurrences() > 0)
UP.PeelCount = UnrollPeelCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
if (UnrollAllowRemainder.getNumOccurrences() > 0)
@ -259,10 +251,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;
if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
@ -281,16 +269,39 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = *UserRuntime;
if (UserUpperBound.hasValue())
UP.UpperBound = *UserUpperBound;
if (UserAllowPeeling.hasValue())
UP.AllowPeeling = *UserAllowPeeling;
if (UserAllowProfileBasedPeeling.hasValue())
UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
if (UserFullUnrollMaxCount.hasValue())
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
}
TargetTransformInfo::PeelingPreferences
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &TTI,
Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling) {
TargetTransformInfo::PeelingPreferences PP;
// Get Target Specifc Values
TTI.getPeelingPreferences(L, SE, PP);
// User Specified Values using cl::opt
if (UnrollPeelCount.getNumOccurrences() > 0)
PP.PeelCount = UnrollPeelCount;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
PP.AllowPeeling = UnrollAllowPeeling;
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
// User Specifed values provided by argument
if (UserAllowPeeling.hasValue())
PP.AllowPeeling = *UserAllowPeeling;
if (UserAllowProfileBasedPeeling.hasValue())
PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
return PP;
}
namespace {
/// A struct to densely store the state of an instruction after unrolling at
@ -761,7 +772,8 @@ bool llvm::computeUnrollCount(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@ -863,8 +875,8 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
computePeelCount(L, LoopSize, UP, TripCount, SE);
if (UP.PeelCount) {
computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
@ -1067,8 +1079,9 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@ -1142,7 +1155,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
TripMultiple, LoopSize, UP, UseUpperBound);
TripMultiple, LoopSize, UP, PP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
@ -1157,7 +1170,7 @@ static LoopUnrollResult tryToUnrollLoop(
LoopUnrollResult UnrollResult = UnrollLoop(
L,
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder,
ForgetAllSCEV},
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
@ -1189,7 +1202,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
(IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
(IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;

View File

@ -279,19 +279,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the UP.PeelCount value set by the target in
// TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
unsigned TargetPeelCount = PP.PeelCount;
PP.PeelCount = 0;
if (!canPeel(L))
return;
// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
if (!UP.AllowLoopNestsPeeling && !L->empty())
if (!PP.AllowLoopNestsPeeling && !L->empty())
return;
// If the user provided a peel count, use that.
@ -299,13 +300,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (UserPeelCount) {
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
UP.PeelCount = UnrollForcePeelCount;
UP.PeelProfiledIterations = true;
PP.PeelCount = UnrollForcePeelCount;
PP.PeelProfiledIterations = true;
return;
}
// Skip peeling if it's disabled.
if (!UP.AllowPeeling)
if (!PP.AllowPeeling)
return;
unsigned AlreadyPeeled = 0;
@ -354,8 +355,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
<< " iteration(s) to turn"
<< " some Phis into invariants.\n");
UP.PeelCount = DesiredPeelCount;
UP.PeelProfiledIterations = false;
PP.PeelCount = DesiredPeelCount;
PP.PeelProfiledIterations = false;
return;
}
}
@ -367,7 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
// Do not apply profile base peeling if it is disabled.
if (!UP.PeelProfiledIterations)
if (!PP.PeelProfiledIterations)
return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
@ -387,7 +388,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
UP.PeelCount = *PeelCount;
PP.PeelCount = *PeelCount;
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");