mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[LPM] Port CGProfilePass from NPM to LPM
Reviewers: hans, chandlerc!, asbirlea, nikic Reviewed By: hans, nikic Subscribers: steven_wu, dexonsmith, nikic, echristo, void, zhizhouy, cfe-commits, aeubanks, MaskRay, jvesely, nhaehnle, hiraditya, kerbowa, llvm-commits Tags: #llvm, #clang Differential Revision: https://reviews.llvm.org/D83013
This commit is contained in:
parent
b81cf0114c
commit
ae2ba7e14e
@ -103,6 +103,7 @@ void initializeCFGViewerLegacyPassPass(PassRegistry&);
|
||||
void initializeCFIInstrInserterPass(PassRegistry&);
|
||||
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
|
||||
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
|
||||
void initializeCGProfileLegacyPassPass(PassRegistry &);
|
||||
void initializeCallGraphDOTPrinterPass(PassRegistry&);
|
||||
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
|
||||
void initializeCallGraphViewerPass(PassRegistry&);
|
||||
|
@ -282,6 +282,8 @@ ModulePass *createSampleProfileLoaderPass(StringRef Name);
|
||||
ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
|
||||
raw_ostream *ThinLinkOS = nullptr);
|
||||
|
||||
ModulePass *createCGProfileLegacyPass();
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -156,6 +156,7 @@ public:
|
||||
|
||||
bool DisableTailCalls;
|
||||
bool DisableUnrollLoops;
|
||||
bool CallGraphProfile;
|
||||
bool SLPVectorize;
|
||||
bool LoopVectorize;
|
||||
bool LoopsInterleaved;
|
||||
|
@ -19,11 +19,6 @@ namespace llvm {
|
||||
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
|
||||
private:
|
||||
void addModuleFlags(
|
||||
Module &M,
|
||||
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -248,10 +248,6 @@ static cl::opt<bool>
|
||||
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable control height reduction optimization (CHR)"));
|
||||
|
||||
static cl::opt<bool> EnableCallGraphProfile(
|
||||
"enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
|
||||
|
||||
/// Flag to enable inline deferral during PGO.
|
||||
static cl::opt<bool>
|
||||
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
|
||||
@ -267,7 +263,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
|
||||
Coroutines = false;
|
||||
LicmMssaOptCap = SetLicmMssaOptCap;
|
||||
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
|
||||
CallGraphProfile = EnableCallGraphProfile;
|
||||
CallGraphProfile = true;
|
||||
}
|
||||
|
||||
extern cl::opt<bool> EnableHotColdSplit;
|
||||
|
@ -195,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
|
||||
PrepareForThinLTO = EnablePrepareForThinLTO;
|
||||
PerformThinLTO = EnablePerformThinLTO;
|
||||
DivergentTarget = false;
|
||||
CallGraphProfile = true;
|
||||
}
|
||||
|
||||
PassManagerBuilder::~PassManagerBuilder() {
|
||||
@ -834,6 +835,10 @@ void PassManagerBuilder::populateModulePassManager(
|
||||
if (MergeFunctions)
|
||||
MPM.add(createMergeFunctionsPass());
|
||||
|
||||
// Add Module flag "CG Profile" based on Branch Frequency Information.
|
||||
if (CallGraphProfile)
|
||||
MPM.add(createCGProfileLegacyPass());
|
||||
|
||||
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
|
||||
// canonicalization pass that enables other optimizations. As a result,
|
||||
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
|
||||
|
@ -10,22 +10,47 @@
|
||||
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Instrumentation.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
static bool
|
||||
addModuleFlags(Module &M,
|
||||
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
|
||||
if (Counts.empty())
|
||||
return false;
|
||||
|
||||
LLVMContext &Context = M.getContext();
|
||||
MDBuilder MDB(Context);
|
||||
std::vector<Metadata *> Nodes;
|
||||
|
||||
for (auto E : Counts) {
|
||||
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
|
||||
ValueAsMetadata::get(E.first.second),
|
||||
MDB.createConstant(ConstantInt::get(
|
||||
Type::getInt64Ty(Context), E.second))};
|
||||
Nodes.push_back(MDNode::get(Context, Vals));
|
||||
}
|
||||
|
||||
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool runCGProfilePass(
|
||||
Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
|
||||
function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
|
||||
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
|
||||
FunctionAnalysisManager &FAM =
|
||||
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
||||
InstrProfSymtab Symtab;
|
||||
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
|
||||
Function *CalledF, uint64_t NewCount) {
|
||||
@ -35,14 +60,18 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
Count = SaturatingAdd(Count, NewCount);
|
||||
};
|
||||
// Ignore error here. Indirect calls are ignored if this fails.
|
||||
(void)(bool)Symtab.create(M);
|
||||
(void)(bool) Symtab.create(M);
|
||||
for (auto &F : M) {
|
||||
if (F.isDeclaration())
|
||||
// Avoid extra cost of running passes for BFI when the function doesn't have
|
||||
// entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
|
||||
// if using LazyBlockFrequencyInfoPass.
|
||||
// TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
|
||||
if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
|
||||
continue;
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
auto &BFI = GetBFI(F);
|
||||
if (BFI.getEntryFreq() == 0)
|
||||
continue;
|
||||
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
|
||||
TargetTransformInfo &TTI = GetTTI(F);
|
||||
for (auto &BB : F) {
|
||||
Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
|
||||
if (!BBCount)
|
||||
@ -69,28 +98,56 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
}
|
||||
}
|
||||
|
||||
addModuleFlags(M, Counts);
|
||||
return addModuleFlags(M, Counts);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct CGProfileLegacyPass final : public ModulePass {
|
||||
static char ID;
|
||||
CGProfileLegacyPass() : ModulePass(ID) {
|
||||
initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<LazyBlockFrequencyInfoPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
|
||||
return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
|
||||
};
|
||||
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
|
||||
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||
};
|
||||
|
||||
return runCGProfilePass(M, GetBFI, GetTTI, true);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
char CGProfileLegacyPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
|
||||
false)
|
||||
|
||||
ModulePass *llvm::createCGProfileLegacyPass() {
|
||||
return new CGProfileLegacyPass();
|
||||
}
|
||||
|
||||
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
|
||||
FunctionAnalysisManager &FAM =
|
||||
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
||||
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
|
||||
return FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
};
|
||||
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
|
||||
return FAM.getResult<TargetIRAnalysis>(F);
|
||||
};
|
||||
|
||||
runCGProfilePass(M, GetBFI, GetTTI, false);
|
||||
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
||||
void CGProfilePass::addModuleFlags(
|
||||
Module &M,
|
||||
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
|
||||
if (Counts.empty())
|
||||
return;
|
||||
|
||||
LLVMContext &Context = M.getContext();
|
||||
MDBuilder MDB(Context);
|
||||
std::vector<Metadata *> Nodes;
|
||||
|
||||
for (auto E : Counts) {
|
||||
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
|
||||
ValueAsMetadata::get(E.first.second),
|
||||
MDB.createConstant(ConstantInt::get(
|
||||
Type::getInt64Ty(Context), E.second))};
|
||||
Nodes.push_back(MDNode::get(Context, Vals));
|
||||
}
|
||||
|
||||
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
|
||||
}
|
||||
|
@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
|
||||
initializePGOInstrumentationUseLegacyPassPass(Registry);
|
||||
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
|
||||
initializePGOMemOPSizeOptLegacyPassPass(Registry);
|
||||
initializeCGProfileLegacyPassPass(Registry);
|
||||
initializeInstrOrderFileLegacyPassPass(Registry);
|
||||
initializeInstrProfilingLegacyPassPass(Registry);
|
||||
initializeMemorySanitizerLegacyPassPass(Registry);
|
||||
|
@ -276,6 +276,12 @@
|
||||
; GCN-O1-NEXT: Warn about non-applied transformations
|
||||
; GCN-O1-NEXT: Alignment from assumptions
|
||||
; GCN-O1-NEXT: Strip Unused Function Prototypes
|
||||
; GCN-O1-NEXT: Call Graph Profile
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-NEXT: Natural Loop Information
|
||||
; GCN-O1-NEXT: Lazy Branch Probability Analysis
|
||||
; GCN-O1-NEXT: Lazy Block Frequency Analysis
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-NEXT: Natural Loop Information
|
||||
@ -623,6 +629,12 @@
|
||||
; GCN-O2-NEXT: Strip Unused Function Prototypes
|
||||
; GCN-O2-NEXT: Dead Global Elimination
|
||||
; GCN-O2-NEXT: Merge Duplicate Global Constants
|
||||
; GCN-O2-NEXT: Call Graph Profile
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Dominator Tree Construction
|
||||
; GCN-O2-NEXT: Natural Loop Information
|
||||
; GCN-O2-NEXT: Lazy Branch Probability Analysis
|
||||
; GCN-O2-NEXT: Lazy Block Frequency Analysis
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Dominator Tree Construction
|
||||
; GCN-O2-NEXT: Natural Loop Information
|
||||
@ -975,6 +987,12 @@
|
||||
; GCN-O3-NEXT: Strip Unused Function Prototypes
|
||||
; GCN-O3-NEXT: Dead Global Elimination
|
||||
; GCN-O3-NEXT: Merge Duplicate Global Constants
|
||||
; GCN-O3-NEXT: Call Graph Profile
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Dominator Tree Construction
|
||||
; GCN-O3-NEXT: Natural Loop Information
|
||||
; GCN-O3-NEXT: Lazy Branch Probability Analysis
|
||||
; GCN-O3-NEXT: Lazy Block Frequency Analysis
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Dominator Tree Construction
|
||||
; GCN-O3-NEXT: Natural Loop Information
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt < %s -passes cg-profile -S | FileCheck %s
|
||||
; RUN: opt < %s -cg-profile -S | FileCheck %s
|
||||
|
||||
declare void @b()
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
; RUN: opt -debug-pass-manager -passes='default<O2>' %s 2>&1 |FileCheck %s --check-prefixes=DEFAULT
|
||||
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF
|
||||
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON
|
||||
;
|
||||
; DEFAULT: Running pass: CGProfilePass
|
||||
; OFF-NOT: Running pass: CGProfilePass
|
||||
; ON: Running pass: CGProfilePass
|
||||
|
||||
define void @foo() {
|
||||
ret void
|
||||
}
|
@ -280,6 +280,12 @@
|
||||
; CHECK-NEXT: Strip Unused Function Prototypes
|
||||
; CHECK-NEXT: Dead Global Elimination
|
||||
; CHECK-NEXT: Merge Duplicate Global Constants
|
||||
; CHECK-NEXT: Call Graph Profile
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
|
@ -285,6 +285,12 @@
|
||||
; CHECK-NEXT: Strip Unused Function Prototypes
|
||||
; CHECK-NEXT: Dead Global Elimination
|
||||
; CHECK-NEXT: Merge Duplicate Global Constants
|
||||
; CHECK-NEXT: Call Graph Profile
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
|
@ -266,6 +266,12 @@
|
||||
; CHECK-NEXT: Strip Unused Function Prototypes
|
||||
; CHECK-NEXT: Dead Global Elimination
|
||||
; CHECK-NEXT: Merge Duplicate Global Constants
|
||||
; CHECK-NEXT: Call Graph Profile
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
|
Loading…
x
Reference in New Issue
Block a user