1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[HotColdSplit] Schedule splitting late to fix perf regression

With or without PGO data applied, splitting early in the pipeline
(either before the inliner or shortly after it) regresses performance
across SPEC variants. The cause appears to be that splitting hides
context for subsequent optimizations.

Schedule splitting late again, in effect reversing r352080, which
scheduled the splitting pass early for code size benefits (documented in
https://reviews.llvm.org/D57082).

Differential Revision: https://reviews.llvm.org/D58258

llvm-svn: 354158
This commit is contained in:
Vedant Kumar 2019-02-15 18:46:44 +00:00
parent 665ec71dc0
commit 8f94c48cae
7 changed files with 43 additions and 36 deletions

View File

@ -274,7 +274,8 @@ public:
/// require some transformations for semantic reasons, they should explicitly /// require some transformations for semantic reasons, they should explicitly
/// build them. /// build them.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level,
bool DebugLogging = false); bool DebugLogging = false,
bool LTOPreLink = false);
/// Build a per-module default optimization pipeline. /// Build a per-module default optimization pipeline.
/// ///
@ -288,7 +289,8 @@ public:
/// require some transformations for semantic reasons, they should explicitly /// require some transformations for semantic reasons, they should explicitly
/// build them. /// build them.
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool DebugLogging = false); bool DebugLogging = false,
bool LTOPreLink = false);
/// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// Build a pre-link, ThinLTO-targeting default optimization pipeline to
/// a pass manager. /// a pass manager.

View File

@ -703,14 +703,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnableSyntheticCounts && !PGOOpt) if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation()); MPM.addPass(SyntheticCountsPropagation());
// Split out cold code. Splitting is done before inlining because 1) the most
// common kinds of cold regions can (a) be found before inlining and (b) do
// not grow after inlining, and 2) inhibiting inlining of cold code improves
// code size & compile time. Split after Mem2Reg to make code model estimates
// more accurate, but before InstCombine to allow it to clean things up.
if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink)
MPM.addPass(HotColdSplittingPass());
// Require the GlobalsAA analysis for the module so we can query it within // Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline. // the CGSCC pipeline.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
@ -769,9 +761,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
return MPM; return MPM;
} }
ModulePassManager ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) {
bool DebugLogging) {
ModulePassManager MPM(DebugLogging); ModulePassManager MPM(DebugLogging);
// Optimize globals now that the module is fully simplified. // Optimize globals now that the module is fully simplified.
@ -880,6 +871,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// alignment information, try to re-derive it here. // alignment information, try to re-derive it here.
OptimizePM.addPass(AlignmentFromAssumptionsPass()); OptimizePM.addPass(AlignmentFromAssumptionsPass());
// Split out cold code. Splitting is done late to avoid hiding context from
// other optimizations and inadvertently regressing performance. The tradeoff
// is that this has a higher code size cost than splitting early.
if (EnableHotColdSplit && !LTOPreLink)
MPM.addPass(HotColdSplittingPass());
// LoopSink pass sinks instructions hoisted by LICM, which serves as a // LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result, // canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@ -923,7 +920,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
ModulePassManager ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) { bool DebugLogging, bool LTOPreLink) {
assert(Level != O0 && "Must request optimizations for the default pipeline!"); assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging); ModulePassManager MPM(DebugLogging);
@ -943,7 +940,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
DebugLogging)); DebugLogging));
// Now add the optimization pipeline. // Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink));
return MPM; return MPM;
} }
@ -1027,7 +1024,8 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) { bool DebugLogging) {
assert(Level != O0 && "Must request optimizations for the default pipeline!"); assert(Level != O0 && "Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline! // FIXME: We should use a customized pre-link pipeline!
return buildPerModuleDefaultPipeline(Level, DebugLogging); return buildPerModuleDefaultPipeline(Level, DebugLogging,
/*LTOPreLink=*/true);
} }
ModulePassManager ModulePassManager
@ -1208,6 +1206,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// CFI is disabled. // CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
// Enable splitting late in the FullLTO post-link pipeline. This is done in
// the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
if (EnableHotColdSplit)
MPM.addPass(HotColdSplittingPass());
// Add late LTO optimization passes. // Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed. // Delete basic blocks, which optimization passes may have killed.
MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));

View File

@ -529,11 +529,6 @@ void PassManagerBuilder::populateModulePassManager(
if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM); addPGOInstrPasses(MPM);
// Split out cold code before inlining. See comment in the new PM
// (\ref buildModuleSimplificationPipeline).
if (EnableHotColdSplit && DefaultOrPreLinkPipeline)
MPM.add(createHotColdSplittingPass());
// We add a module alias analysis pass here. In part due to bugs in the // We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive // analysis infrastructure this "works" in that the analysis stays alive
// for the entire SCC pass run below. // for the entire SCC pass run below.
@ -730,6 +725,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createConstantMergePass()); // Merge dup global constants MPM.add(createConstantMergePass()); // Merge dup global constants
} }
// See comment in the new PM for justification of scheduling splitting at
// this stage (\ref buildModuleSimplificationPipeline).
if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
MPM.add(createHotColdSplittingPass());
if (MergeFunctions) if (MergeFunctions)
MPM.add(createMergeFunctionsPass()); MPM.add(createMergeFunctionsPass());
@ -918,6 +918,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
void PassManagerBuilder::addLateLTOOptimizationPasses( void PassManagerBuilder::addLateLTOOptimizationPasses(
legacy::PassManagerBase &PM) { legacy::PassManagerBase &PM) {
// See comment in the new PM for justification of scheduling splitting at
// this stage (\ref buildLTODefaultPipeline).
if (EnableHotColdSplit)
PM.add(createHotColdSplittingPass());
// Delete basic blocks, which optimization passes may have killed. // Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass()); PM.add(createCFGSimplificationPass());

View File

@ -1,10 +1,10 @@
; RUN: opt -module-summary %s -o %t.bc ; RUN: opt -module-summary %s -o %t.bc
; RUN: llvm-lto -hot-cold-split=true -thinlto-action=run %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-THINLTO-POSTLINK-Os ; RUN: llvm-lto -hot-cold-split=true -thinlto-action=run %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-ANYLTO-POSTLINK-Os
; RUN: llvm-lto -hot-cold-split=true %t.bc -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=OLDPM-ANYLTO-POSTLINK-Os
; REQUIRES: asserts ; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
; OLDPM-THINLTO-POSTLINK-Os-LABEL: Pass Arguments ; OLDPM-ANYLTO-POSTLINK-Os: Hot Cold Splitting
; OLDPM-THINLTO-POSTLINK-Os-NOT: Hot Cold Splitting

View File

@ -13,7 +13,6 @@
; GEN: Running pass: PGOInstrumentationGen ; GEN: Running pass: PGOInstrumentationGen
; USE: Running pass: PGOInstrumentationUse ; USE: Running pass: PGOInstrumentationUse
; USE: Running pass: PGOIndirectCallPromotion ; USE: Running pass: PGOIndirectCallPromotion
; SPLIT: Running pass: HotColdSplittingPass
; USE: Running pass: PGOMemOPSizeOpt ; USE: Running pass: PGOMemOPSizeOpt
; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
@ -27,6 +26,7 @@
; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass ; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass
; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion ; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion
; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
; SPLIT: Running pass: HotColdSplittingPass
define void @foo() { define void @foo() {
ret void ret void

View File

@ -1,22 +1,19 @@
; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=DEFAULT-Os ; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=DEFAULT-Os
; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='lto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='lto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os
; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os
; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='lto<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-POSTLINK-Os
; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-POSTLINK-Os ; RUN: opt -mtriple=x86_64-- -Os -hot-cold-split=true -passes='thinlto<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-POSTLINK-Os
; REQUIRES: asserts ; REQUIRES: asserts
; Splitting should occur after Mem2Reg and should be followed by InstCombine. ; Splitting should occur late.
; DEFAULT-Os: Promote Memory to Register
; DEFAULT-Os: Hot Cold Splitting ; DEFAULT-Os: Hot Cold Splitting
; DEFAULT-Os: Combine redundant instructions ; DEFAULT-Os: Simplify the CFG
; LTO-PRELINK-Os-LABEL: Starting llvm::Module pass manager run. ; LTO-PRELINK-Os-NOT: pass: HotColdSplittingPass
; LTO-PRELINK-Os: Running pass: {{.*}}PromotePass
; LTO-PRELINK-Os: Running pass: HotColdSplittingPass
; THINLTO-PRELINK-Os-LABEL: Running analysis: PassInstrumentationAnalysis ; THINLTO-PRELINK-Os-NOT: Running pass: HotColdSplittingPass
; THINLTO-PRELINK-Os: Running pass: {{.*}}PromotePass
; THINLTO-PRELINK-Os: Running pass: HotColdSplittingPass
; THINLTO-POSTLINK-Os-NOT: HotColdSplitting ; LTO-POSTLINK-Os: HotColdSplitting
; THINLTO-POSTLINK-Os: HotColdSplitting

View File

@ -41,7 +41,6 @@
; PGOUSE: Function Integration/Inlining ; PGOUSE: Function Integration/Inlining
; PGOUSE: PGOInstrumentationUsePass ; PGOUSE: PGOInstrumentationUsePass
; PGOUSE: PGOIndirectCallPromotion ; PGOUSE: PGOIndirectCallPromotion
; SPLIT: Hot Cold Splitting
; PGOUSE: CallGraph Construction ; PGOUSE: CallGraph Construction
; CHECK-O2-NEXT: Globals Alias Analysis ; CHECK-O2-NEXT: Globals Alias Analysis
; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Call Graph SCC Pass Manager
@ -100,6 +99,7 @@
; the runtime unrolling though. ; the runtime unrolling though.
; CHECK-O2: Loop Pass Manager ; CHECK-O2: Loop Pass Manager
; CHECK-O2-NEXT: Loop Invariant Code Motion ; CHECK-O2-NEXT: Loop Invariant Code Motion
; SPLIT: Hot Cold Splitting
; CHECK-O2: FunctionPass Manager ; CHECK-O2: FunctionPass Manager
; CHECK-O2: Loop Pass Manager ; CHECK-O2: Loop Pass Manager
; CHECK-O2-NEXT: Loop Sink ; CHECK-O2-NEXT: Loop Sink