1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[LPM] Port CGProfilePass from NPM to LPM

Reviewers: hans, chandlerc!, asbirlea, nikic

Reviewed By: hans, nikic

Subscribers: steven_wu, dexonsmith, nikic, echristo, void, zhizhouy, cfe-commits, aeubanks, MaskRay, jvesely, nhaehnle, hiraditya, kerbowa, llvm-commits

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D83013
This commit is contained in:
Zequan Wu 2020-07-08 12:30:28 -07:00
parent b81cf0114c
commit ae2ba7e14e
14 changed files with 134 additions and 50 deletions

View File

@ -103,6 +103,7 @@ void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
void initializeCGProfileLegacyPassPass(PassRegistry &);
void initializeCallGraphDOTPrinterPass(PassRegistry&);
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
void initializeCallGraphViewerPass(PassRegistry&);

View File

@ -282,6 +282,8 @@ ModulePass *createSampleProfileLoaderPass(StringRef Name);
ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
raw_ostream *ThinLinkOS = nullptr);
ModulePass *createCGProfileLegacyPass();
} // End llvm namespace
#endif

View File

@ -156,6 +156,7 @@ public:
bool DisableTailCalls;
bool DisableUnrollLoops;
bool CallGraphProfile;
bool SLPVectorize;
bool LoopVectorize;
bool LoopsInterleaved;

View File

@ -19,11 +19,6 @@ namespace llvm {
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
void addModuleFlags(
Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
};
} // end namespace llvm

View File

@ -248,10 +248,6 @@ static cl::opt<bool>
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
static cl::opt<bool> EnableCallGraphProfile(
"enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
/// Flag to enable inline deferral during PGO.
static cl::opt<bool>
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
@ -267,7 +263,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
Coroutines = false;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = EnableCallGraphProfile;
CallGraphProfile = true;
}
extern cl::opt<bool> EnableHotColdSplit;

View File

@ -195,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
CallGraphProfile = true;
}
PassManagerBuilder::~PassManagerBuilder() {
@ -834,6 +835,10 @@ void PassManagerBuilder::populateModulePassManager(
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
// Add Module flag "CG Profile" based on Branch Frequency Information.
if (CallGraphProfile)
MPM.add(createCGProfileLegacyPass());
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM

View File

@ -10,22 +10,47 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Instrumentation.h"
#include <array>
using namespace llvm;
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
static bool
addModuleFlags(Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
if (Counts.empty())
return false;
LLVMContext &Context = M.getContext();
MDBuilder MDB(Context);
std::vector<Metadata *> Nodes;
for (auto E : Counts) {
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
ValueAsMetadata::get(E.first.second),
MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(Context), E.second))};
Nodes.push_back(MDNode::get(Context, Vals));
}
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
return true;
}
static bool runCGProfilePass(
Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
Function *CalledF, uint64_t NewCount) {
@ -35,14 +60,18 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
Count = SaturatingAdd(Count, NewCount);
};
// Ignore error here. Indirect calls are ignored if this fails.
(void)(bool)Symtab.create(M);
(void)(bool) Symtab.create(M);
for (auto &F : M) {
if (F.isDeclaration())
// Avoid extra cost of running passes for BFI when the function doesn't have
// entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
// if using LazyBlockFrequencyInfoPass.
// TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
continue;
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto &BFI = GetBFI(F);
if (BFI.getEntryFreq() == 0)
continue;
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
TargetTransformInfo &TTI = GetTTI(F);
for (auto &BB : F) {
Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
if (!BBCount)
@ -69,28 +98,56 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
}
}
addModuleFlags(M, Counts);
return addModuleFlags(M, Counts);
}
namespace {
struct CGProfileLegacyPass final : public ModulePass {
static char ID;
CGProfileLegacyPass() : ModulePass(ID) {
initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LazyBlockFrequencyInfoPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
};
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
return runCGProfilePass(M, GetBFI, GetTTI, true);
}
};
} // namespace
char CGProfileLegacyPass::ID = 0;
INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
false)
ModulePass *llvm::createCGProfileLegacyPass() {
return new CGProfileLegacyPass();
}
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
runCGProfilePass(M, GetBFI, GetTTI, false);
return PreservedAnalyses::all();
}
void CGProfilePass::addModuleFlags(
Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
if (Counts.empty())
return;
LLVMContext &Context = M.getContext();
MDBuilder MDB(Context);
std::vector<Metadata *> Nodes;
for (auto E : Counts) {
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
ValueAsMetadata::get(E.first.second),
MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(Context), E.second))};
Nodes.push_back(MDNode::get(Context, Vals));
}
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
}

View File

@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
initializePGOMemOPSizeOptLegacyPassPass(Registry);
initializeCGProfileLegacyPassPass(Registry);
initializeInstrOrderFileLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
initializeMemorySanitizerLegacyPassPass(Registry);

View File

@ -276,6 +276,12 @@
; GCN-O1-NEXT: Warn about non-applied transformations
; GCN-O1-NEXT: Alignment from assumptions
; GCN-O1-NEXT: Strip Unused Function Prototypes
; GCN-O1-NEXT: Call Graph Profile
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Lazy Branch Probability Analysis
; GCN-O1-NEXT: Lazy Block Frequency Analysis
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
@ -623,6 +629,12 @@
; GCN-O2-NEXT: Strip Unused Function Prototypes
; GCN-O2-NEXT: Dead Global Elimination
; GCN-O2-NEXT: Merge Duplicate Global Constants
; GCN-O2-NEXT: Call Graph Profile
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Lazy Branch Probability Analysis
; GCN-O2-NEXT: Lazy Block Frequency Analysis
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
@ -975,6 +987,12 @@
; GCN-O3-NEXT: Strip Unused Function Prototypes
; GCN-O3-NEXT: Dead Global Elimination
; GCN-O3-NEXT: Merge Duplicate Global Constants
; GCN-O3-NEXT: Call Graph Profile
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Lazy Branch Probability Analysis
; GCN-O3-NEXT: Lazy Block Frequency Analysis
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -passes cg-profile -S | FileCheck %s
; RUN: opt < %s -cg-profile -S | FileCheck %s
declare void @b()

View File

@ -1,11 +0,0 @@
; RUN: opt -debug-pass-manager -passes='default<O2>' %s 2>&1 |FileCheck %s --check-prefixes=DEFAULT
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON
;
; DEFAULT: Running pass: CGProfilePass
; OFF-NOT: Running pass: CGProfilePass
; ON: Running pass: CGProfilePass
define void @foo() {
ret void
}

View File

@ -280,6 +280,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information

View File

@ -285,6 +285,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information

View File

@ -266,6 +266,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information