mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[Profile] Implement select instruction instrumentation in IR PGO
Differential Revision: http://reviews.llvm.org/D23727 llvm-svn: 281858
This commit is contained in:
parent
fdf2d14b30
commit
3da3a5fee4
@ -9704,6 +9704,37 @@ structures and the code to increment the appropriate value, in a
|
||||
format that can be written out by a compiler runtime and consumed via
|
||||
the ``llvm-profdata`` tool.
|
||||
|
||||
'``llvm.instrprof_increment_step``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare void @llvm.instrprof_increment_step(i8* <name>, i64 <hash>,
|
||||
i32 <num-counters>,
|
||||
i32 <index>, i64 <step>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.instrprof_increment_step``' intrinsic is an extension to
|
||||
the '``llvm.instrprof_increment``' intrinsic with an additional fifth
|
||||
argument to specify the step of the increment.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
The first four arguments are the same as '``llvm.instrprof_increment``'
|
||||
instrinsic.
|
||||
|
||||
The last argument specifies the value of the increment of the counter variable.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
See description of '``llvm.instrprof_increment``' instrinsic.
|
||||
|
||||
|
||||
'``llvm.instrprof_value_profile``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -361,6 +361,17 @@ namespace llvm {
|
||||
}
|
||||
};
|
||||
|
||||
class InstrProfIncrementInstStep : public InstrProfIncrementInst {
|
||||
public:
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
|
||||
}
|
||||
static inline bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
Value *getStep() const { return const_cast<Value *>(getArgOperand(4)); }
|
||||
};
|
||||
|
||||
/// This represents the llvm.instrprof_value_profile intrinsic.
|
||||
class InstrProfValueProfileInst : public IntrinsicInst {
|
||||
public:
|
||||
|
@ -346,6 +346,12 @@ def int_instrprof_increment : Intrinsic<[],
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[]>;
|
||||
|
||||
// A counter increment with step for instrumentation based profiling.
|
||||
def int_instrprof_increment_step : Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_i64_ty,
|
||||
llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
|
||||
[]>;
|
||||
|
||||
// A call to profile runtime for value profiling of target expressions
|
||||
// through instrumentation based profiling.
|
||||
def int_instrprof_value_profile : Intrinsic<[],
|
||||
|
@ -107,6 +107,13 @@ StringRef InstrProfiling::getCoverageSection() const {
|
||||
return getInstrProfCoverageSectionName(isMachO());
|
||||
}
|
||||
|
||||
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
|
||||
InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
|
||||
if (Inc)
|
||||
return Inc;
|
||||
return dyn_cast<InstrProfIncrementInst>(Instr);
|
||||
}
|
||||
|
||||
bool InstrProfiling::run(Module &M) {
|
||||
bool MadeChange = false;
|
||||
|
||||
@ -138,7 +145,8 @@ bool InstrProfiling::run(Module &M) {
|
||||
for (BasicBlock &BB : F)
|
||||
for (auto I = BB.begin(), E = BB.end(); I != E;) {
|
||||
auto Instr = I++;
|
||||
if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
|
||||
InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
|
||||
if (Inc) {
|
||||
lowerIncrement(Inc);
|
||||
MadeChange = true;
|
||||
} else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
|
||||
@ -214,6 +222,14 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
|
||||
Ind->eraseFromParent();
|
||||
}
|
||||
|
||||
static Value *getIncrementStep(InstrProfIncrementInst *Inc,
|
||||
IRBuilder<> &Builder) {
|
||||
auto *IncWithStep = dyn_cast<InstrProfIncrementInstStep>(Inc);
|
||||
if (IncWithStep)
|
||||
return IncWithStep->getStep();
|
||||
return Builder.getInt64(1);
|
||||
}
|
||||
|
||||
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
|
||||
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
|
||||
|
||||
@ -221,7 +237,7 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
|
||||
uint64_t Index = Inc->getIndex()->getZExtValue();
|
||||
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
|
||||
Value *Count = Builder.CreateLoad(Addr, "pgocount");
|
||||
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
|
||||
Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder));
|
||||
Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
|
||||
Inc->eraseFromParent();
|
||||
}
|
||||
|
@ -86,6 +86,7 @@ using namespace llvm;
|
||||
#define DEBUG_TYPE "pgo-instrumentation"
|
||||
|
||||
STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
|
||||
STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
|
||||
STATISTIC(NumOfPGOEdge, "Number of edges.");
|
||||
STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
|
||||
STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
|
||||
@ -133,7 +134,65 @@ static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function",
|
||||
static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
// Command line option to enable/disable select instruction instrumentation.
|
||||
static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
|
||||
cl::Hidden);
|
||||
namespace {
|
||||
|
||||
/// The select instruction visitor plays three roles specified
|
||||
/// by the mode. In \c VM_counting mode, it simply counts the number of
|
||||
/// select instructions. In \c VM_instrument mode, it inserts code to count
|
||||
/// the number times TrueValue of select is taken. In \c VM_annotate mode,
|
||||
/// it reads the profile data and annotate the select instruction with metadata.
|
||||
enum VisitMode { VM_counting, VM_instrument, VM_annotate };
|
||||
class PGOUseFunc;
|
||||
|
||||
/// Instruction Visitor class to visit select instructions.
|
||||
struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
|
||||
Function &F;
|
||||
unsigned NSIs = 0; // Number of select instructions instrumented.
|
||||
VisitMode Mode = VM_counting; // Visiting mode.
|
||||
unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
|
||||
unsigned TotalNumCtrs = 0; // Total number of counters
|
||||
GlobalVariable *FuncNameVar = nullptr;
|
||||
uint64_t FuncHash = 0;
|
||||
PGOUseFunc *UseFunc = nullptr;
|
||||
|
||||
SelectInstVisitor(Function &Func) : F(Func) {}
|
||||
|
||||
void countSelects(Function &Func) {
|
||||
Mode = VM_counting;
|
||||
visit(Func);
|
||||
}
|
||||
// Visit the IR stream and instrument all select instructions. \p
|
||||
// Ind is a pointer to the counter index variable; \p TotalNC
|
||||
// is the total number of counters; \p FNV is the pointer to the
|
||||
// PGO function name var; \p FHash is the function hash.
|
||||
void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
|
||||
GlobalVariable *FNV, uint64_t FHash) {
|
||||
Mode = VM_instrument;
|
||||
CurCtrIdx = Ind;
|
||||
TotalNumCtrs = TotalNC;
|
||||
FuncHash = FHash;
|
||||
FuncNameVar = FNV;
|
||||
visit(Func);
|
||||
}
|
||||
|
||||
// Visit the IR stream and annotate all select instructions.
|
||||
void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
|
||||
Mode = VM_annotate;
|
||||
UseFunc = UF;
|
||||
CurCtrIdx = Ind;
|
||||
visit(Func);
|
||||
}
|
||||
|
||||
void instrumentOneSelectInst(SelectInst &SI);
|
||||
void annotateOneSelectInst(SelectInst &SI);
|
||||
// Visit \p SI instruction and perform tasks according to visit mode.
|
||||
void visitSelectInst(SelectInst &SI);
|
||||
unsigned getNumOfSelectInsts() const { return NSIs; }
|
||||
};
|
||||
|
||||
class PGOInstrumentationGenLegacyPass : public ModulePass {
|
||||
public:
|
||||
static char ID;
|
||||
@ -180,6 +239,7 @@ private:
|
||||
AU.addRequired<BlockFrequencyInfoWrapperPass>();
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char PGOInstrumentationGenLegacyPass::ID = 0;
|
||||
@ -254,6 +314,7 @@ private:
|
||||
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
|
||||
|
||||
public:
|
||||
SelectInstVisitor SIVisitor;
|
||||
std::string FuncName;
|
||||
GlobalVariable *FuncNameVar;
|
||||
// CFG hash value for this function.
|
||||
@ -280,8 +341,13 @@ public:
|
||||
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
|
||||
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
|
||||
BlockFrequencyInfo *BFI = nullptr)
|
||||
: F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
|
||||
: F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
|
||||
MST(F, BPI, BFI) {
|
||||
|
||||
// This should be done before CFG hash computation.
|
||||
SIVisitor.countSelects(Func);
|
||||
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
|
||||
|
||||
FuncName = getPGOFuncName(F);
|
||||
computeCFGHash();
|
||||
if (ComdatMembers.size())
|
||||
@ -308,7 +374,7 @@ public:
|
||||
if (!E->InMST && !E->Removed)
|
||||
NumCounters++;
|
||||
}
|
||||
return NumCounters;
|
||||
return NumCounters + SIVisitor.getNumOfSelectInsts();
|
||||
}
|
||||
};
|
||||
|
||||
@ -328,7 +394,8 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
|
||||
}
|
||||
}
|
||||
JC.update(Indexes);
|
||||
FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
|
||||
FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
|
||||
(uint64_t)findIndirectCallSites(F).size() << 48 |
|
||||
(uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
|
||||
}
|
||||
|
||||
@ -473,6 +540,10 @@ static void instrumentOneFunc(
|
||||
Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
|
||||
Builder.getInt32(I++)});
|
||||
}
|
||||
|
||||
// Now instrument select instructions:
|
||||
FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
|
||||
FuncInfo.FunctionHash);
|
||||
assert(I == NumCounters);
|
||||
|
||||
if (DisableValueProfiling)
|
||||
@ -594,17 +665,17 @@ public:
|
||||
// Return the profile record for this function;
|
||||
InstrProfRecord &getProfileRecord() { return ProfileRecord; }
|
||||
|
||||
// Return the auxiliary BB information.
|
||||
UseBBInfo &getBBInfo(const BasicBlock *BB) const {
|
||||
return FuncInfo.getBBInfo(BB);
|
||||
}
|
||||
|
||||
private:
|
||||
Function &F;
|
||||
Module *M;
|
||||
// This member stores the shared information with class PGOGenFunc.
|
||||
FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
|
||||
|
||||
// Return the auxiliary BB information.
|
||||
UseBBInfo &getBBInfo(const BasicBlock *BB) const {
|
||||
return FuncInfo.getBBInfo(BB);
|
||||
}
|
||||
|
||||
// The maximum count value in the profile. This is only used in PGO use
|
||||
// compilation.
|
||||
uint64_t ProgramMaxCount;
|
||||
@ -677,6 +748,8 @@ void PGOUseFunc::setInstrumentedCounts(
|
||||
NewEdge1.InMST = true;
|
||||
getBBInfo(InstrBB).setBBInfoCount(CountValue);
|
||||
}
|
||||
// Now annotate select instructions
|
||||
FuncInfo.SIVisitor.annotateSelects(F, this, &I);
|
||||
assert(I == CountFromProfile.size());
|
||||
}
|
||||
|
||||
@ -820,7 +893,7 @@ void PGOUseFunc::populateCounters() {
|
||||
DEBUG(FuncInfo.dumpInfo("after reading profile."));
|
||||
}
|
||||
|
||||
static void setProfMetadata(Module *M, TerminatorInst *TI,
|
||||
static void setProfMetadata(Module *M, Instruction *TI,
|
||||
ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
|
||||
MDBuilder MDB(M->getContext());
|
||||
assert(MaxCount > 0 && "Bad max count");
|
||||
@ -869,6 +942,57 @@ void PGOUseFunc::setBranchWeights() {
|
||||
}
|
||||
}
|
||||
|
||||
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
|
||||
Module *M = F.getParent();
|
||||
IRBuilder<> Builder(&SI);
|
||||
Type *Int64Ty = Builder.getInt64Ty();
|
||||
Type *I8PtrTy = Builder.getInt8PtrTy();
|
||||
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
|
||||
Builder.CreateCall(
|
||||
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
|
||||
{llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
|
||||
Builder.getInt64(FuncHash),
|
||||
Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
|
||||
++(*CurCtrIdx);
|
||||
}
|
||||
|
||||
void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
|
||||
std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
|
||||
assert(*CurCtrIdx < CountFromProfile.size() &&
|
||||
"Out of bound access of counters");
|
||||
uint64_t SCounts[2];
|
||||
SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
|
||||
++(*CurCtrIdx);
|
||||
uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue;
|
||||
// False Count
|
||||
SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
|
||||
uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
|
||||
setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
|
||||
}
|
||||
|
||||
void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
|
||||
if (!PGOInstrSelect)
|
||||
return;
|
||||
// FIXME: do not handle this yet.
|
||||
if (SI.getCondition()->getType()->isVectorTy())
|
||||
return;
|
||||
|
||||
NSIs++;
|
||||
switch (Mode) {
|
||||
case VM_counting:
|
||||
return;
|
||||
case VM_instrument:
|
||||
instrumentOneSelectInst(SI);
|
||||
break;
|
||||
case VM_annotate:
|
||||
annotateOneSelectInst(SI);
|
||||
break;
|
||||
default:
|
||||
assert(false && "Unknown visiting mode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse all the indirect callsites and annotate the instructions.
|
||||
void PGOUseFunc::annotateIndirectCallSites() {
|
||||
if (DisableValueProfiling)
|
||||
|
8
test/Transforms/PGOProfile/Inputs/select1.proftext
Normal file
8
test/Transforms/PGOProfile/Inputs/select1.proftext
Normal file
@ -0,0 +1,8 @@
|
||||
:ir
|
||||
test_br_2
|
||||
72057623705475732
|
||||
3
|
||||
4
|
||||
1
|
||||
1
|
||||
|
34
test/Transforms/PGOProfile/select1.ll
Normal file
34
test/Transforms/PGOProfile/select1.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
|
||||
; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
|
||||
; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
|
||||
; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
|
||||
; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata
|
||||
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
|
||||
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @test_br_2(i32 %i) {
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %i, 0
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
%add = add nsw i32 %i, 2
|
||||
;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64
|
||||
;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]])
|
||||
;NOSELECT-NOT: call void @llvm.instrprof.increment.step
|
||||
%s = select i1 %cmp, i32 %add, i32 0
|
||||
;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]]
|
||||
;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3}
|
||||
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
%sub = sub nsw i32 %i, 2
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
|
||||
ret i32 %retv
|
||||
}
|
Loading…
Reference in New Issue
Block a user