1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00

[LoopUnroll] Honor '#pragma unroll' even with -fno-unroll-loops.

When using clang with `-fno-unroll-loops` (implicitly added with `-O1`),
the LoopUnrollPass is not not added to the (legacy) pass pipeline. This
also means that it will not process any loop metadata such as
llvm.loop.unroll.enable (which is generated by #pragma unroll or
WarnMissedTransformationsPass emits a warning that a forced
transformation has not been applied (see
https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20181210/610833.html).
Such explicit transformations should take precedence over disabling
heuristics.

This patch unconditionally adds LoopUnrollPass to the optimizing
pipeline (that is, it is still not added with `-O0`), but passes a flag
indicating whether automatic unrolling is dis-/enabled. This is the same
approach as LoopVectorize uses.

The new pass manager's pipeline builder has no option to disable
unrolling, hence the problem does not apply.

Differential Revision: https://reviews.llvm.org/D55716

llvm-svn: 349509
This commit is contained in:
Michael Kruse 2018-12-18 17:16:05 +00:00
parent 7e697bf4c9
commit d632378f2f
5 changed files with 99 additions and 38 deletions

View File

@ -183,11 +183,12 @@ Pass *createLoopInstSimplifyPass();
//
// LoopUnroll - This pass is a simple loop unrolling pass.
//
Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
int Threshold = -1, int Count = -1,
int AllowPartial = -1, int Runtime = -1,
int UpperBound = -1, int AllowPeeling = -1);
// Create an unrolling pass for full unrolling that uses exact trip count only.
Pass *createSimpleLoopUnrollPass(int OptLevel = 2);
Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false);
//===----------------------------------------------------------------------===//
//

View File

@ -24,8 +24,14 @@ class LPMUpdater;
class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
const int OptLevel;
/// If false, use a cost model to determine whether unrolling of a loop is
/// profitable. If true, only loops that explicitly request unrolling via
/// metadata are considered. All other loops are skipped.
const bool OnlyWhenForced;
public:
explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
: OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
@ -50,7 +56,13 @@ struct LoopUnrollOptions {
Optional<bool> AllowUpperBound;
int OptLevel;
LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {}
/// If false, use a cost model to determine whether unrolling of a loop is
/// profitable. If true, only loops that explicitly request unrolling via
/// metadata are considered. All other loops are skipped.
bool OnlyWhenForced;
LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
: OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
/// Enables or disables partial unrolling. When disabled only full unrolling
/// is allowed.

View File

@ -378,8 +378,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
if (!DisableUnrollLoops)
MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
MPM.add(createSimpleLoopUnrollPass(OptLevel,
DisableUnrollLoops)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
@ -682,16 +682,17 @@ void PassManagerBuilder::populateModulePassManager(
addExtensionsToPM(EP_Peephole, MPM);
addInstructionCombiningPass(MPM);
if (EnableUnrollAndJam && !DisableUnrollLoops) {
// Unroll and Jam. We do this before unroll but need to be in a separate
// loop pass manager in order for the outer loop to be processed by
// unroll and jam before the inner loop is unrolled.
MPM.add(createLoopUnrollAndJamPass(OptLevel));
}
MPM.add(createLoopUnrollPass(OptLevel,
DisableUnrollLoops)); // Unroll small loops
if (!DisableUnrollLoops) {
if (EnableUnrollAndJam) {
// Unroll and Jam. We do this before unroll but need to be in a separate
// loop pass manager in order for the outer loop to be processed by
// unroll and jam before the inner loop is unrolled.
MPM.add(createLoopUnrollAndJamPass(OptLevel));
}
MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@ -700,7 +701,7 @@ void PassManagerBuilder::populateModulePassManager(
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
MPM.add(createLICMPass());
}
}
MPM.add(createWarnMissedTransformationsPass());
@ -872,12 +873,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
if (!DisableUnrollLoops)
PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
PM.add(createSimpleLoopUnrollPass(OptLevel,
DisableUnrollLoops)); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
PM.add(createLoopUnrollPass(OptLevel));
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
PM.add(createWarnMissedTransformationsPass());

View File

@ -965,13 +965,15 @@ static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
Optional<bool> ProvidedAllowPeeling) {
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
if (hasUnrollTransformation(L) & TM_Disable)
TransformationMode TM = hasUnrollTransformation(L);
if (TM & TM_Disable)
return LoopUnrollResult::Unmodified;
if (!L->isLoopSimplifyForm()) {
LLVM_DEBUG(
@ -979,6 +981,11 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
// When automtatic unrolling is disabled, do not unroll unless overridden for
// this loop.
if (OnlyWhenForced && !(TM & TM_Enable))
return LoopUnrollResult::Unmodified;
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
@ -1119,6 +1126,12 @@ public:
static char ID; // Pass ID, replacement for typeid
int OptLevel;
/// If false, use a cost model to determine whether unrolling of a loop is
/// profitable. If true, only loops that explicitly request unrolling via
/// metadata are considered. All other loops are skipped.
bool OnlyWhenForced;
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@ -1126,15 +1139,16 @@ public:
Optional<bool> ProvidedUpperBound;
Optional<bool> ProvidedAllowPeeling;
LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
Optional<bool> AllowPeeling = None)
: LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
ProvidedAllowPeeling(AllowPeeling) {
: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@ -1157,8 +1171,8 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound, ProvidedAllowPeeling);
if (Result == LoopUnrollResult::FullyUnrolled)
@ -1188,14 +1202,16 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
int AllowPartial, int Runtime, int UpperBound,
Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
int Threshold, int Count, int AllowPartial,
int Runtime, int UpperBound,
int AllowPeeling) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
return new LoopUnroll(
OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
OptLevel, OnlyWhenForced,
Threshold == -1 ? None : Optional<unsigned>(Threshold),
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
Runtime == -1 ? None : Optional<bool>(Runtime),
@ -1203,8 +1219,8 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
}
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@ -1234,7 +1250,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
/*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@ -1371,7 +1388,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
&L, DT, &LI, SE, TTI, AC, ORE,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
/*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;

View File

@ -0,0 +1,30 @@
; RUN: opt -disable-loop-unrolling -O1 -S < %s | FileCheck %s
;
; Check loop unrolling metadata is honored despite automatic unrolling
; being disabled in the pass builder.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @forced(
; CHECK: load
; CHECK: load
define void @forced(i32* nocapture %a) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret void
}
!0 = distinct !{!0, !{!"llvm.loop.unroll.enable"},
!{!"llvm.loop.unroll.count", i32 2}}