mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[PassManager] add helper function to hold set of vector passes (2nd try)
This is better no-functional-change-intended than the 1st attempt. As noted in D102002, there were at least 2 diffs that went unchecked in pass manager regressions tests: different pass parameters (SimplifyCFG) and an extension point/callback. Those should be lifted from the original code blocks correctly now.
This commit is contained in:
parent
45aaf991af
commit
971fe30acc
@ -709,6 +709,9 @@ private:
|
|||||||
|
|
||||||
void addRequiredLTOPreLinkPasses(ModulePassManager &MPM);
|
void addRequiredLTOPreLinkPasses(ModulePassManager &MPM);
|
||||||
|
|
||||||
|
void addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM,
|
||||||
|
bool IsLTO);
|
||||||
|
|
||||||
static Optional<std::vector<PipelineElement>>
|
static Optional<std::vector<PipelineElement>>
|
||||||
parsePipelineText(StringRef Text);
|
parsePipelineText(StringRef Text);
|
||||||
|
|
||||||
|
@ -218,7 +218,8 @@ private:
|
|||||||
void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
|
void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
|
||||||
void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
|
void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
|
||||||
void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
|
void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
|
||||||
|
void addVectorPasses(legacy::PassManagerBase &PM, bool IsLTO);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// populateFunctionPassManager - This fills in the function pass manager,
|
/// populateFunctionPassManager - This fills in the function pass manager,
|
||||||
/// which is expected to be run on each function immediately as it is
|
/// which is expected to be run on each function immediately as it is
|
||||||
|
@ -1201,6 +1201,127 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
|||||||
return MPM;
|
return MPM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// TODO: Should LTO cause any differences to this set of passes?
|
||||||
|
void PassBuilder::addVectorPasses(OptimizationLevel Level,
|
||||||
|
FunctionPassManager &FPM, bool IsLTO) {
|
||||||
|
FPM.addPass(LoopVectorizePass(
|
||||||
|
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
|
||||||
|
|
||||||
|
if (IsLTO) {
|
||||||
|
// The vectorizer may have significantly shortened a loop body; unroll
|
||||||
|
// again. Unroll small loops to hide loop backedge latency and saturate any
|
||||||
|
// parallel execution resources of an out-of-order processor. We also then
|
||||||
|
// need to clean up redundancies and loop invariant code.
|
||||||
|
// FIXME: It would be really good to use a loop-integrated instruction
|
||||||
|
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
||||||
|
// across the loop nests.
|
||||||
|
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
|
||||||
|
if (EnableUnrollAndJam && PTO.LoopUnrolling)
|
||||||
|
FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
|
||||||
|
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
|
||||||
|
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
|
||||||
|
PTO.ForgetAllSCEVInLoopUnroll)));
|
||||||
|
FPM.addPass(WarnMissedTransformationsPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsLTO) {
|
||||||
|
// Eliminate loads by forwarding stores from the previous iteration to loads
|
||||||
|
// of the current iteration.
|
||||||
|
FPM.addPass(LoopLoadEliminationPass());
|
||||||
|
}
|
||||||
|
// Cleanup after the loop optimization passes.
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
|
||||||
|
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
|
||||||
|
// At higher optimization levels, try to clean up any runtime overlap and
|
||||||
|
// alignment checks inserted by the vectorizer. We want to track correlated
|
||||||
|
// runtime checks for two inner loops in the same outer loop, fold any
|
||||||
|
// common computations, hoist loop-invariant aspects out of any outer loop,
|
||||||
|
// and unswitch the runtime checks if possible. Once hoisted, we may have
|
||||||
|
// dead (or speculatable) control flows or more combining opportunities.
|
||||||
|
FPM.addPass(EarlyCSEPass());
|
||||||
|
FPM.addPass(CorrelatedValuePropagationPass());
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
LoopPassManager LPM;
|
||||||
|
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
|
||||||
|
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
|
||||||
|
OptimizationLevel::O3));
|
||||||
|
FPM.addPass(
|
||||||
|
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
||||||
|
FPM.addPass(createFunctionToLoopPassAdaptor(
|
||||||
|
std::move(LPM), EnableMSSALoopDependency,
|
||||||
|
/*UseBlockFrequencyInfo=*/true));
|
||||||
|
FPM.addPass(SimplifyCFGPass());
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsLTO) {
|
||||||
|
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
|
||||||
|
} else {
|
||||||
|
// Now that we've formed fast to execute loop structures, we do further
|
||||||
|
// optimizations. These are run afterward as they might block doing complex
|
||||||
|
// analyses and transforms such as what are needed for loop vectorization.
|
||||||
|
|
||||||
|
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
||||||
|
// GVN, loop transforms, and others have already run, so it's now better to
|
||||||
|
// convert to more optimized IR using more aggressive simplify CFG options.
|
||||||
|
// The extra sinking transform can create larger basic blocks, so do this
|
||||||
|
// before SLP vectorization.
|
||||||
|
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
|
||||||
|
.forwardSwitchCondToPhi(true)
|
||||||
|
.convertSwitchToLookupTable(true)
|
||||||
|
.needCanonicalLoops(false)
|
||||||
|
.hoistCommonInsts(true)
|
||||||
|
.sinkCommonInsts(true)));
|
||||||
|
}
|
||||||
|
if (IsLTO) {
|
||||||
|
FPM.addPass(SCCPPass());
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
FPM.addPass(BDCEPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimize parallel scalar instruction chains into SIMD instructions.
|
||||||
|
if (PTO.SLPVectorization) {
|
||||||
|
FPM.addPass(SLPVectorizerPass());
|
||||||
|
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
|
||||||
|
FPM.addPass(EarlyCSEPass());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Enhance/cleanup vector code.
|
||||||
|
FPM.addPass(VectorCombinePass());
|
||||||
|
|
||||||
|
if (!IsLTO) {
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
// Unroll small loops to hide loop backedge latency and saturate any
|
||||||
|
// parallel execution resources of an out-of-order processor. We also then
|
||||||
|
// need to clean up redundancies and loop invariant code.
|
||||||
|
// FIXME: It would be really good to use a loop-integrated instruction
|
||||||
|
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
||||||
|
// across the loop nests.
|
||||||
|
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
|
||||||
|
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
|
||||||
|
FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
|
||||||
|
}
|
||||||
|
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
|
||||||
|
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
|
||||||
|
PTO.ForgetAllSCEVInLoopUnroll)));
|
||||||
|
FPM.addPass(WarnMissedTransformationsPass());
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
FPM.addPass(
|
||||||
|
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
||||||
|
FPM.addPass(createFunctionToLoopPassAdaptor(
|
||||||
|
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
|
||||||
|
EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we've vectorized and unrolled loops, we may have more refined
|
||||||
|
// alignment information, try to re-derive it here.
|
||||||
|
FPM.addPass(AlignmentFromAssumptionsPass());
|
||||||
|
|
||||||
|
if (IsLTO)
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
}
|
||||||
|
|
||||||
ModulePassManager
|
ModulePassManager
|
||||||
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
||||||
bool LTOPreLink) {
|
bool LTOPreLink) {
|
||||||
@ -1295,91 +1416,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
|||||||
// from the TargetLibraryInfo.
|
// from the TargetLibraryInfo.
|
||||||
OptimizePM.addPass(InjectTLIMappings());
|
OptimizePM.addPass(InjectTLIMappings());
|
||||||
|
|
||||||
// Now run the core loop vectorizer.
|
addVectorPasses(Level, OptimizePM, /* IsLTO */ false);
|
||||||
OptimizePM.addPass(LoopVectorizePass(
|
|
||||||
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
|
|
||||||
|
|
||||||
// Eliminate loads by forwarding stores from the previous iteration to loads
|
|
||||||
// of the current iteration.
|
|
||||||
OptimizePM.addPass(LoopLoadEliminationPass());
|
|
||||||
|
|
||||||
// Cleanup after the loop optimization passes.
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
|
||||||
|
|
||||||
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
|
|
||||||
// At higher optimization levels, try to clean up any runtime overlap and
|
|
||||||
// alignment checks inserted by the vectorizer. We want to track correlated
|
|
||||||
// runtime checks for two inner loops in the same outer loop, fold any
|
|
||||||
// common computations, hoist loop-invariant aspects out of any outer loop,
|
|
||||||
// and unswitch the runtime checks if possible. Once hoisted, we may have
|
|
||||||
// dead (or speculatable) control flows or more combining opportunities.
|
|
||||||
OptimizePM.addPass(EarlyCSEPass());
|
|
||||||
OptimizePM.addPass(CorrelatedValuePropagationPass());
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
|
||||||
LoopPassManager LPM;
|
|
||||||
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
|
|
||||||
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
|
|
||||||
OptimizationLevel::O3));
|
|
||||||
OptimizePM.addPass(
|
|
||||||
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
|
||||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
|
|
||||||
std::move(LPM), EnableMSSALoopDependency,
|
|
||||||
/*UseBlockFrequencyInfo=*/true));
|
|
||||||
OptimizePM.addPass(SimplifyCFGPass());
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that we've formed fast to execute loop structures, we do further
|
|
||||||
// optimizations. These are run afterward as they might block doing complex
|
|
||||||
// analyses and transforms such as what are needed for loop vectorization.
|
|
||||||
|
|
||||||
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
|
||||||
// GVN, loop transforms, and others have already run, so it's now better to
|
|
||||||
// convert to more optimized IR using more aggressive simplify CFG options.
|
|
||||||
// The extra sinking transform can create larger basic blocks, so do this
|
|
||||||
// before SLP vectorization.
|
|
||||||
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
|
|
||||||
.forwardSwitchCondToPhi(true)
|
|
||||||
.convertSwitchToLookupTable(true)
|
|
||||||
.needCanonicalLoops(false)
|
|
||||||
.hoistCommonInsts(true)
|
|
||||||
.sinkCommonInsts(true)));
|
|
||||||
|
|
||||||
// Optimize parallel scalar instruction chains into SIMD instructions.
|
|
||||||
if (PTO.SLPVectorization) {
|
|
||||||
OptimizePM.addPass(SLPVectorizerPass());
|
|
||||||
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
|
|
||||||
OptimizePM.addPass(EarlyCSEPass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enhance/cleanup vector code.
|
|
||||||
OptimizePM.addPass(VectorCombinePass());
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
|
||||||
|
|
||||||
// Unroll small loops to hide loop backedge latency and saturate any parallel
|
|
||||||
// execution resources of an out-of-order processor. We also then need to
|
|
||||||
// clean up redundancies and loop invariant code.
|
|
||||||
// FIXME: It would be really good to use a loop-integrated instruction
|
|
||||||
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
|
||||||
// across the loop nests.
|
|
||||||
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
|
|
||||||
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
|
|
||||||
OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
|
|
||||||
}
|
|
||||||
OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
|
|
||||||
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
|
|
||||||
PTO.ForgetAllSCEVInLoopUnroll)));
|
|
||||||
OptimizePM.addPass(WarnMissedTransformationsPass());
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
|
||||||
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
|
||||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
|
|
||||||
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
|
|
||||||
EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
|
|
||||||
|
|
||||||
// Now that we've vectorized and unrolled loops, we may have more refined
|
|
||||||
// alignment information, try to re-derive it here.
|
|
||||||
OptimizePM.addPass(AlignmentFromAssumptionsPass());
|
|
||||||
|
|
||||||
// Split out cold code. Splitting is done late to avoid hiding context from
|
// Split out cold code. Splitting is done late to avoid hiding context from
|
||||||
// other optimizations and inadvertently regressing performance. The tradeoff
|
// other optimizations and inadvertently regressing performance. The tradeoff
|
||||||
@ -1825,39 +1862,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
|
|||||||
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
|
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
|
||||||
|
|
||||||
MainFPM.addPass(LoopDistributePass());
|
MainFPM.addPass(LoopDistributePass());
|
||||||
MainFPM.addPass(LoopVectorizePass(
|
|
||||||
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
|
|
||||||
// The vectorizer may have significantly shortened a loop body; unroll again.
|
|
||||||
MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions(
|
|
||||||
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
|
|
||||||
PTO.ForgetAllSCEVInLoopUnroll)));
|
|
||||||
|
|
||||||
MainFPM.addPass(WarnMissedTransformationsPass());
|
addVectorPasses(Level, MainFPM, /* IsLTO */ true);
|
||||||
|
|
||||||
MainFPM.addPass(InstCombinePass());
|
|
||||||
MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
|
|
||||||
MainFPM.addPass(SCCPPass());
|
|
||||||
MainFPM.addPass(InstCombinePass());
|
|
||||||
MainFPM.addPass(BDCEPass());
|
|
||||||
|
|
||||||
// More scalar chains could be vectorized due to more alias information
|
|
||||||
if (PTO.SLPVectorization) {
|
|
||||||
MainFPM.addPass(SLPVectorizerPass());
|
|
||||||
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
|
|
||||||
MainFPM.addPass(EarlyCSEPass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization.
|
|
||||||
|
|
||||||
// After vectorization, assume intrinsics may tell us more about pointer
|
|
||||||
// alignments.
|
|
||||||
MainFPM.addPass(AlignmentFromAssumptionsPass());
|
|
||||||
|
|
||||||
// FIXME: Conditionally run LoadCombine here, after it's ported
|
|
||||||
// (in case we still have this pass, given its questionable usefulness).
|
|
||||||
|
|
||||||
MainFPM.addPass(InstCombinePass());
|
|
||||||
invokePeepholeEPCallbacks(MainFPM, Level);
|
invokePeepholeEPCallbacks(MainFPM, Level);
|
||||||
MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
|
MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
|
||||||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
|
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
|
||||||
|
@ -523,6 +523,124 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
|
|||||||
MPM.add(createControlHeightReductionLegacyPass());
|
MPM.add(createControlHeightReductionLegacyPass());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// FIXME: Should LTO cause any differences to this set of passes?
|
||||||
|
void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
|
||||||
|
bool IsLTO) {
|
||||||
|
PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
|
||||||
|
|
||||||
|
if (IsLTO) {
|
||||||
|
// The vectorizer may have significantly shortened a loop body; unroll
|
||||||
|
// again. Unroll small loops to hide loop backedge latency and saturate any
|
||||||
|
// parallel execution resources of an out-of-order processor. We also then
|
||||||
|
// need to clean up redundancies and loop invariant code.
|
||||||
|
// FIXME: It would be really good to use a loop-integrated instruction
|
||||||
|
// combiner for cleanup here so that the unrolling and LICM can be pipelined
|
||||||
|
// across the loop nests.
|
||||||
|
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
|
||||||
|
if (EnableUnrollAndJam && !DisableUnrollLoops)
|
||||||
|
PM.add(createLoopUnrollAndJamPass(OptLevel));
|
||||||
|
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
||||||
|
ForgetAllSCEVInLoopUnroll));
|
||||||
|
PM.add(createWarnMissedTransformationsPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsLTO) {
|
||||||
|
// Eliminate loads by forwarding stores from the previous iteration to loads
|
||||||
|
// of the current iteration.
|
||||||
|
PM.add(createLoopLoadEliminationPass());
|
||||||
|
}
|
||||||
|
// Cleanup after the loop optimization passes.
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
|
||||||
|
if (OptLevel > 1 && ExtraVectorizerPasses) {
|
||||||
|
// At higher optimization levels, try to clean up any runtime overlap and
|
||||||
|
// alignment checks inserted by the vectorizer. We want to track correlated
|
||||||
|
// runtime checks for two inner loops in the same outer loop, fold any
|
||||||
|
// common computations, hoist loop-invariant aspects out of any outer loop,
|
||||||
|
// and unswitch the runtime checks if possible. Once hoisted, we may have
|
||||||
|
// dead (or speculatable) control flows or more combining opportunities.
|
||||||
|
PM.add(createEarlyCSEPass());
|
||||||
|
PM.add(createCorrelatedValuePropagationPass());
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
|
||||||
|
PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
|
||||||
|
PM.add(createCFGSimplificationPass());
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsLTO) {
|
||||||
|
PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
|
||||||
|
.hoistCommonInsts(true)));
|
||||||
|
} else {
|
||||||
|
// Now that we've formed fast to execute loop structures, we do further
|
||||||
|
// optimizations. These are run afterward as they might block doing complex
|
||||||
|
// analyses and transforms such as what are needed for loop vectorization.
|
||||||
|
|
||||||
|
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
||||||
|
// GVN, loop transforms, and others have already run, so it's now better to
|
||||||
|
// convert to more optimized IR using more aggressive simplify CFG options.
|
||||||
|
// The extra sinking transform can create larger basic blocks, so do this
|
||||||
|
// before SLP vectorization.
|
||||||
|
PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
|
||||||
|
.forwardSwitchCondToPhi(true)
|
||||||
|
.convertSwitchToLookupTable(true)
|
||||||
|
.needCanonicalLoops(false)
|
||||||
|
.hoistCommonInsts(true)
|
||||||
|
.sinkCommonInsts(true)));
|
||||||
|
}
|
||||||
|
if (IsLTO) {
|
||||||
|
PM.add(createSCCPPass()); // Propagate exposed constants
|
||||||
|
PM.add(createInstructionCombiningPass()); // Clean up again
|
||||||
|
PM.add(createBitTrackingDCEPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimize parallel scalar instruction chains into SIMD instructions.
|
||||||
|
if (SLPVectorize) {
|
||||||
|
PM.add(createSLPVectorizerPass());
|
||||||
|
if (OptLevel > 1 && ExtraVectorizerPasses)
|
||||||
|
PM.add(createEarlyCSEPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhance/cleanup vector code.
|
||||||
|
PM.add(createVectorCombinePass());
|
||||||
|
|
||||||
|
if (!IsLTO) {
|
||||||
|
addExtensionsToPM(EP_Peephole, PM);
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
|
||||||
|
if (EnableUnrollAndJam && !DisableUnrollLoops) {
|
||||||
|
// Unroll and Jam. We do this before unroll but need to be in a separate
|
||||||
|
// loop pass manager in order for the outer loop to be processed by
|
||||||
|
// unroll and jam before the inner loop is unrolled.
|
||||||
|
PM.add(createLoopUnrollAndJamPass(OptLevel));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unroll small loops
|
||||||
|
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
||||||
|
ForgetAllSCEVInLoopUnroll));
|
||||||
|
|
||||||
|
if (!DisableUnrollLoops) {
|
||||||
|
// LoopUnroll may generate some redundency to cleanup.
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
|
||||||
|
// Runtime unrolling will introduce runtime check in loop prologue. If the
|
||||||
|
// unrolled loop is a inner loop, then the prologue will be inside the
|
||||||
|
// outer loop. LICM pass can help to promote the runtime check out if the
|
||||||
|
// checked value is loop invariant.
|
||||||
|
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
|
||||||
|
}
|
||||||
|
|
||||||
|
PM.add(createWarnMissedTransformationsPass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// After vectorization and unrolling, assume intrinsics may tell us more
|
||||||
|
// about pointer alignments.
|
||||||
|
PM.add(createAlignmentFromAssumptionsPass());
|
||||||
|
|
||||||
|
if (IsLTO)
|
||||||
|
PM.add(createInstructionCombiningPass());
|
||||||
|
}
|
||||||
|
|
||||||
void PassManagerBuilder::populateModulePassManager(
|
void PassManagerBuilder::populateModulePassManager(
|
||||||
legacy::PassManagerBase &MPM) {
|
legacy::PassManagerBase &MPM) {
|
||||||
// Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
|
// Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
|
||||||
@ -794,86 +912,7 @@ void PassManagerBuilder::populateModulePassManager(
|
|||||||
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
|
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
|
||||||
MPM.add(createLoopDistributePass());
|
MPM.add(createLoopDistributePass());
|
||||||
|
|
||||||
MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
|
addVectorPasses(MPM, /* IsLTO */ false);
|
||||||
|
|
||||||
// Eliminate loads by forwarding stores from the previous iteration to loads
|
|
||||||
// of the current iteration.
|
|
||||||
MPM.add(createLoopLoadEliminationPass());
|
|
||||||
|
|
||||||
// FIXME: Because of #pragma vectorize enable, the passes below are always
|
|
||||||
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
|
|
||||||
// on -O1 and no #pragma is found). Would be good to have these two passes
|
|
||||||
// as function calls, so that we can only pass them when the vectorizer
|
|
||||||
// changed the code.
|
|
||||||
MPM.add(createInstructionCombiningPass());
|
|
||||||
if (OptLevel > 1 && ExtraVectorizerPasses) {
|
|
||||||
// At higher optimization levels, try to clean up any runtime overlap and
|
|
||||||
// alignment checks inserted by the vectorizer. We want to track correllated
|
|
||||||
// runtime checks for two inner loops in the same outer loop, fold any
|
|
||||||
// common computations, hoist loop-invariant aspects out of any outer loop,
|
|
||||||
// and unswitch the runtime checks if possible. Once hoisted, we may have
|
|
||||||
// dead (or speculatable) control flows or more combining opportunities.
|
|
||||||
MPM.add(createEarlyCSEPass());
|
|
||||||
MPM.add(createCorrelatedValuePropagationPass());
|
|
||||||
MPM.add(createInstructionCombiningPass());
|
|
||||||
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
|
|
||||||
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
|
|
||||||
MPM.add(createCFGSimplificationPass());
|
|
||||||
MPM.add(createInstructionCombiningPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
|
||||||
// GVN, loop transforms, and others have already run, so it's now better to
|
|
||||||
// convert to more optimized IR using more aggressive simplify CFG options.
|
|
||||||
// The extra sinking transform can create larger basic blocks, so do this
|
|
||||||
// before SLP vectorization.
|
|
||||||
MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
|
|
||||||
.forwardSwitchCondToPhi(true)
|
|
||||||
.convertSwitchToLookupTable(true)
|
|
||||||
.needCanonicalLoops(false)
|
|
||||||
.hoistCommonInsts(true)
|
|
||||||
.sinkCommonInsts(true)));
|
|
||||||
|
|
||||||
if (SLPVectorize) {
|
|
||||||
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
|
||||||
if (OptLevel > 1 && ExtraVectorizerPasses) {
|
|
||||||
MPM.add(createEarlyCSEPass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enhance/cleanup vector code.
|
|
||||||
MPM.add(createVectorCombinePass());
|
|
||||||
|
|
||||||
addExtensionsToPM(EP_Peephole, MPM);
|
|
||||||
MPM.add(createInstructionCombiningPass());
|
|
||||||
|
|
||||||
if (EnableUnrollAndJam && !DisableUnrollLoops) {
|
|
||||||
// Unroll and Jam. We do this before unroll but need to be in a separate
|
|
||||||
// loop pass manager in order for the outer loop to be processed by
|
|
||||||
// unroll and jam before the inner loop is unrolled.
|
|
||||||
MPM.add(createLoopUnrollAndJamPass(OptLevel));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unroll small loops
|
|
||||||
MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
|
||||||
ForgetAllSCEVInLoopUnroll));
|
|
||||||
|
|
||||||
if (!DisableUnrollLoops) {
|
|
||||||
// LoopUnroll may generate some redundency to cleanup.
|
|
||||||
MPM.add(createInstructionCombiningPass());
|
|
||||||
|
|
||||||
// Runtime unrolling will introduce runtime check in loop prologue. If the
|
|
||||||
// unrolled loop is a inner loop, then the prologue will be inside the
|
|
||||||
// outer loop. LICM pass can help to promote the runtime check out if the
|
|
||||||
// checked value is loop invariant.
|
|
||||||
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
|
|
||||||
}
|
|
||||||
|
|
||||||
MPM.add(createWarnMissedTransformationsPass());
|
|
||||||
|
|
||||||
// After vectorization and unrolling, assume intrinsics may tell us more
|
|
||||||
// about pointer alignments.
|
|
||||||
MPM.add(createAlignmentFromAssumptionsPass());
|
|
||||||
|
|
||||||
// FIXME: We shouldn't bother with this anymore.
|
// FIXME: We shouldn't bother with this anymore.
|
||||||
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
|
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
|
||||||
@ -1083,35 +1122,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
|
|||||||
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
||||||
ForgetAllSCEVInLoopUnroll));
|
ForgetAllSCEVInLoopUnroll));
|
||||||
PM.add(createLoopDistributePass());
|
PM.add(createLoopDistributePass());
|
||||||
PM.add(createLoopVectorizePass(true, !LoopVectorize));
|
|
||||||
// The vectorizer may have significantly shortened a loop body; unroll again.
|
|
||||||
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
|
||||||
ForgetAllSCEVInLoopUnroll));
|
|
||||||
|
|
||||||
PM.add(createWarnMissedTransformationsPass());
|
addVectorPasses(PM, /* IsLTO */ true);
|
||||||
|
|
||||||
// Now that we've optimized loops (in particular loop induction variables),
|
|
||||||
// we may have exposed more scalar opportunities. Run parts of the scalar
|
|
||||||
// optimizer again at this point.
|
|
||||||
PM.add(createInstructionCombiningPass()); // Initial cleanup
|
|
||||||
PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
|
|
||||||
.hoistCommonInsts(true)));
|
|
||||||
PM.add(createSCCPPass()); // Propagate exposed constants
|
|
||||||
PM.add(createInstructionCombiningPass()); // Clean up again
|
|
||||||
PM.add(createBitTrackingDCEPass());
|
|
||||||
|
|
||||||
// More scalar chains could be vectorized due to more alias information
|
|
||||||
if (SLPVectorize)
|
|
||||||
PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
|
||||||
|
|
||||||
PM.add(createVectorCombinePass()); // Clean up partial vectorization.
|
|
||||||
|
|
||||||
// After vectorization, assume intrinsics may tell us more about pointer
|
|
||||||
// alignments.
|
|
||||||
PM.add(createAlignmentFromAssumptionsPass());
|
|
||||||
|
|
||||||
// Cleanup and simplify the code after the scalar optimizations.
|
|
||||||
PM.add(createInstructionCombiningPass());
|
|
||||||
addExtensionsToPM(EP_Peephole, PM);
|
addExtensionsToPM(EP_Peephole, PM);
|
||||||
|
|
||||||
PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
|
PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
|
||||||
|
Loading…
Reference in New Issue
Block a user