mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
Increases full-unroll threshold.
Summary: The default threshold for fully unroll is too conservative. This patch doubles the full-unroll threshold This change will affect the following speccpu2006 benchmarks (performance numbers were collected from Intel Sandybridge): Performance: 403 0.11% 433 0.51% 445 0.48% 447 3.50% 453 1.49% 464 0.75% Code size: 403 0.56% 433 0.96% 445 2.16% 447 2.96% 453 0.94% 464 8.02% The compiler time overhead is similar with code size. Reviewers: davidxl, mkuper, mzolotukhin, hfinkel, chandlerc Reviewed By: hfinkel, chandlerc Subscribers: mehdi_amini, zzheng, efriedma, haicheng, hfinkel, llvm-commits Differential Revision: https://reviews.llvm.org/D28368 llvm-svn: 295538
This commit is contained in:
parent
458bba747c
commit
997f895ee0
@ -181,11 +181,11 @@ Pass *createLoopInstSimplifyPass();
|
||||
//
|
||||
// LoopUnroll - This pass is a simple loop unrolling pass.
|
||||
//
|
||||
Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
|
||||
Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
|
||||
int AllowPartial = -1, int Runtime = -1,
|
||||
int UpperBound = -1);
|
||||
// Create an unrolling pass for full unrolling that uses exact trip count only.
|
||||
Pass *createSimpleLoopUnrollPass();
|
||||
Pass *createSimpleLoopUnrollPass(int OptLevel);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -18,9 +18,10 @@ namespace llvm {
|
||||
|
||||
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
|
||||
const bool AllowPartialUnrolling;
|
||||
const int OptLevel;
|
||||
|
||||
explicit LoopUnrollPass(bool AllowPartialUnrolling)
|
||||
: AllowPartialUnrolling(AllowPartialUnrolling) {}
|
||||
explicit LoopUnrollPass(bool AllowPartialUnrolling, int OptLevel)
|
||||
: AllowPartialUnrolling(AllowPartialUnrolling), OptLevel(OptLevel) {}
|
||||
|
||||
public:
|
||||
/// Create an instance of the loop unroll pass that will support both full
|
||||
@ -28,16 +29,16 @@ public:
|
||||
///
|
||||
/// This uses the target information (or flags) to control the thresholds for
|
||||
/// different unrolling stategies but supports all of them.
|
||||
static LoopUnrollPass create() {
|
||||
return LoopUnrollPass(/*AllowPartialUnrolling*/ true);
|
||||
static LoopUnrollPass create(int OptLevel = 2) {
|
||||
return LoopUnrollPass(/*AllowPartialUnrolling*/ true, OptLevel);
|
||||
}
|
||||
|
||||
/// Create an instance of the loop unroll pass that only does full loop
|
||||
/// unrolling.
|
||||
///
|
||||
/// This will disable any runtime or partial unrolling.
|
||||
static LoopUnrollPass createFull() {
|
||||
return LoopUnrollPass(/*AllowPartialUnrolling*/ false);
|
||||
static LoopUnrollPass createFull(int OptLevel = 2) {
|
||||
return LoopUnrollPass(/*AllowPartialUnrolling*/ false, OptLevel);
|
||||
}
|
||||
|
||||
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
|
||||
|
@ -334,7 +334,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
|
||||
LPM2.addPass(IndVarSimplifyPass());
|
||||
LPM2.addPass(LoopIdiomRecognizePass());
|
||||
LPM2.addPass(LoopDeletionPass());
|
||||
LPM2.addPass(LoopUnrollPass::createFull());
|
||||
LPM2.addPass(LoopUnrollPass::createFull(Level));
|
||||
|
||||
// We provide the opt remark emitter pass for LICM to use. We only need to do
|
||||
// this once as it is immutable.
|
||||
@ -605,7 +605,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
|
||||
// FIXME: It would be really good to use a loop-integrated instruction
|
||||
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
||||
// across the loop nests.
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create()));
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level)));
|
||||
OptimizePM.addPass(InstCombinePass());
|
||||
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
|
||||
|
@ -320,7 +320,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
|
||||
MPM.add(createCFGSimplificationPass());
|
||||
}
|
||||
if (!DisableUnrollLoops)
|
||||
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
|
||||
MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
|
||||
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
|
||||
|
||||
if (OptLevel > 1) {
|
||||
@ -366,7 +366,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
|
||||
|
||||
// BBVectorize may have significantly shortened a loop body; unroll again.
|
||||
if (!DisableUnrollLoops)
|
||||
MPM.add(createLoopUnrollPass());
|
||||
MPM.add(createLoopUnrollPass(OptLevel));
|
||||
}
|
||||
}
|
||||
|
||||
@ -612,7 +612,7 @@ void PassManagerBuilder::populateModulePassManager(
|
||||
|
||||
// BBVectorize may have significantly shortened a loop body; unroll again.
|
||||
if (!DisableUnrollLoops)
|
||||
MPM.add(createLoopUnrollPass());
|
||||
MPM.add(createLoopUnrollPass(OptLevel));
|
||||
}
|
||||
}
|
||||
|
||||
@ -621,7 +621,7 @@ void PassManagerBuilder::populateModulePassManager(
|
||||
addInstructionCombiningPass(MPM);
|
||||
|
||||
if (!DisableUnrollLoops) {
|
||||
MPM.add(createLoopUnrollPass()); // Unroll small loops
|
||||
MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
|
||||
|
||||
// LoopUnroll may generate some redundency to cleanup.
|
||||
addInstructionCombiningPass(MPM);
|
||||
@ -772,11 +772,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
|
||||
PM.add(createLoopInterchangePass());
|
||||
|
||||
if (!DisableUnrollLoops)
|
||||
PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
|
||||
PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
|
||||
PM.add(createLoopVectorizePass(true, LoopVectorize));
|
||||
// The vectorizer may have significantly shortened a loop body; unroll again.
|
||||
if (!DisableUnrollLoops)
|
||||
PM.add(createLoopUnrollPass());
|
||||
PM.add(createLoopUnrollPass(OptLevel));
|
||||
|
||||
// Now that we've optimized loops (in particular loop induction variables),
|
||||
// we may have exposed more scalar opportunities. Run parts of the scalar
|
||||
|
@ -131,13 +131,14 @@ static const unsigned NoThreshold = UINT_MAX;
|
||||
/// Gather the various unrolling parameters based on the defaults, compiler
|
||||
/// flags, TTI overrides and user specified parameters.
|
||||
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
||||
Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
|
||||
Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
|
||||
Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
|
||||
Loop *L, const TargetTransformInfo &TTI, int OptLevel,
|
||||
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
|
||||
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
|
||||
Optional<bool> UserUpperBound) {
|
||||
TargetTransformInfo::UnrollingPreferences UP;
|
||||
|
||||
// Set up the defaults
|
||||
UP.Threshold = 150;
|
||||
UP.Threshold = OptLevel > 2 ? 300 : 150;
|
||||
UP.MaxPercentThresholdBoost = 400;
|
||||
UP.OptSizeThreshold = 0;
|
||||
UP.PartialThreshold = 150;
|
||||
@ -927,7 +928,7 @@ static bool computeUnrollCount(
|
||||
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
ScalarEvolution *SE, const TargetTransformInfo &TTI,
|
||||
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
|
||||
bool PreserveLCSSA,
|
||||
bool PreserveLCSSA, int OptLevel,
|
||||
Optional<unsigned> ProvidedCount,
|
||||
Optional<unsigned> ProvidedThreshold,
|
||||
Optional<bool> ProvidedAllowPartial,
|
||||
@ -947,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
||||
bool NotDuplicatable;
|
||||
bool Convergent;
|
||||
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
|
||||
L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
|
||||
L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
|
||||
ProvidedRuntime, ProvidedUpperBound);
|
||||
// Exit early if unrolling is disabled.
|
||||
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
|
||||
@ -1047,16 +1048,17 @@ namespace {
|
||||
class LoopUnroll : public LoopPass {
|
||||
public:
|
||||
static char ID; // Pass ID, replacement for typeid
|
||||
LoopUnroll(Optional<unsigned> Threshold = None,
|
||||
LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
|
||||
Optional<unsigned> Count = None,
|
||||
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
|
||||
Optional<bool> UpperBound = None)
|
||||
: LoopPass(ID), ProvidedCount(std::move(Count)),
|
||||
: LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
|
||||
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
|
||||
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
|
||||
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
int OptLevel;
|
||||
Optional<unsigned> ProvidedCount;
|
||||
Optional<unsigned> ProvidedThreshold;
|
||||
Optional<bool> ProvidedAllowPartial;
|
||||
@ -1081,7 +1083,7 @@ public:
|
||||
OptimizationRemarkEmitter ORE(&F);
|
||||
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
|
||||
|
||||
return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
|
||||
return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
|
||||
ProvidedCount, ProvidedThreshold,
|
||||
ProvidedAllowPartial, ProvidedRuntime,
|
||||
ProvidedUpperBound);
|
||||
@ -1107,21 +1109,22 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
|
||||
|
||||
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
|
||||
int Runtime, int UpperBound) {
|
||||
Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
|
||||
int AllowPartial, int Runtime,
|
||||
int UpperBound) {
|
||||
// TODO: It would make more sense for this function to take the optionals
|
||||
// directly, but that's dangerous since it would silently break out of tree
|
||||
// callers.
|
||||
return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
|
||||
Count == -1 ? None : Optional<unsigned>(Count),
|
||||
AllowPartial == -1 ? None
|
||||
: Optional<bool>(AllowPartial),
|
||||
Runtime == -1 ? None : Optional<bool>(Runtime),
|
||||
UpperBound == -1 ? None : Optional<bool>(UpperBound));
|
||||
return new LoopUnroll(
|
||||
OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
|
||||
Count == -1 ? None : Optional<unsigned>(Count),
|
||||
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
|
||||
Runtime == -1 ? None : Optional<bool>(Runtime),
|
||||
UpperBound == -1 ? None : Optional<bool>(UpperBound));
|
||||
}
|
||||
|
||||
Pass *llvm::createSimpleLoopUnrollPass() {
|
||||
return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
|
||||
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
|
||||
return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
|
||||
}
|
||||
|
||||
PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
@ -1153,10 +1156,10 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
|
||||
if (!AllowPartialUnrolling)
|
||||
AllowPartialParam = RuntimeParam = UpperBoundParam = false;
|
||||
bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
|
||||
/*PreserveLCSSA*/ true, /*Count*/ None,
|
||||
/*Threshold*/ None, AllowPartialParam,
|
||||
RuntimeParam, UpperBoundParam);
|
||||
bool Changed = tryToUnrollLoop(
|
||||
&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
|
||||
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
|
||||
/*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
|
@ -1,13 +1,14 @@
|
||||
; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
|
||||
; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
|
||||
; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
|
||||
; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
|
||||
; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
|
||||
; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
|
||||
; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
|
||||
; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
|
||||
; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
|
||||
; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
|
||||
; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
|
||||
; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
|
||||
; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
|
||||
|
||||
; This file tests the llvm.loop.vectorize.enable metadata forcing
|
||||
; vectorization even when optimization levels are too low, or when
|
||||
@ -25,6 +26,9 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
; O3-LABEL: @enabled(
|
||||
; O3: store <4 x i32>
|
||||
; O3: ret i32
|
||||
; O3DEFAULT-LABEL: @enabled(
|
||||
; O3DEFAULT: store <4 x i32>
|
||||
; O3DEFAULT: ret i32
|
||||
; Pragma always wins!
|
||||
; O3DIS-LABEL: @enabled(
|
||||
; O3DIS: store <4 x i32>
|
||||
@ -77,6 +81,9 @@ for.end: ; preds = %for.body
|
||||
; O3-LABEL: @nopragma(
|
||||
; O3: store <4 x i32>
|
||||
; O3: ret i32
|
||||
; O3DEFAULT-LABEL: @nopragma(
|
||||
; O3DEFAULT: store <4 x i32>
|
||||
; O3DEFAULT: ret i32
|
||||
; O3DIS-LABEL: @nopragma(
|
||||
; O3DIS-NOT: store <4 x i32>
|
||||
; O3DIS: ret i32
|
||||
@ -128,6 +135,9 @@ for.end: ; preds = %for.body
|
||||
; O3-LABEL: @disabled(
|
||||
; O3-NOT: store <4 x i32>
|
||||
; O3: ret i32
|
||||
; O3DEFAULT-LABEL: @disabled(
|
||||
; O3DEFAULT: store <4 x i32>
|
||||
; O3DEFAULT: ret i32
|
||||
; O3DIS-LABEL: @disabled(
|
||||
; O3DIS-NOT: store <4 x i32>
|
||||
; O3DIS: ret i32
|
||||
|
Loading…
Reference in New Issue
Block a user