mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it).
Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining. llvm-svn: 48725
This commit is contained in:
parent
00ecee8d58
commit
d01a2a18f8
@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass {
|
|||||||
///
|
///
|
||||||
virtual int getInlineCost(CallSite CS) = 0;
|
virtual int getInlineCost(CallSite CS) = 0;
|
||||||
|
|
||||||
|
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||||
|
// higher threshold to determine if the function call should be inlined.
|
||||||
|
///
|
||||||
|
virtual float getInlineFudgeFactor(CallSite CS) = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// InlineThreshold - Cache the value here for easy access.
|
// InlineThreshold - Cache the value here for easy access.
|
||||||
unsigned InlineThreshold;
|
unsigned InlineThreshold;
|
||||||
|
@ -42,13 +42,17 @@ namespace llvm {
|
|||||||
// used to estimate the code size cost of inlining it.
|
// used to estimate the code size cost of inlining it.
|
||||||
unsigned NumInsts, NumBlocks;
|
unsigned NumInsts, NumBlocks;
|
||||||
|
|
||||||
|
// NumVectorInsts - Keep track how many instrctions produce vector values.
|
||||||
|
// The inliner is being more aggressive with inlining vector kernels.
|
||||||
|
unsigned NumVectorInsts;
|
||||||
|
|
||||||
// ArgumentWeights - Each formal argument of the function is inspected to
|
// ArgumentWeights - Each formal argument of the function is inspected to
|
||||||
// see if it is used in any contexts where making it a constant or alloca
|
// see if it is used in any contexts where making it a constant or alloca
|
||||||
// would reduce the code size. If so, we add some value to the argument
|
// would reduce the code size. If so, we add some value to the argument
|
||||||
// entry here.
|
// entry here.
|
||||||
std::vector<ArgInfo> ArgumentWeights;
|
std::vector<ArgInfo> ArgumentWeights;
|
||||||
|
|
||||||
FunctionInfo() : NumInsts(0), NumBlocks(0) {}
|
FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {}
|
||||||
|
|
||||||
/// analyzeFunction - Fill in the current structure with information gleaned
|
/// analyzeFunction - Fill in the current structure with information gleaned
|
||||||
/// from the specified function.
|
/// from the specified function.
|
||||||
@ -73,7 +77,12 @@ namespace llvm {
|
|||||||
// getInlineCost - The heuristic used to determine if we should inline the
|
// getInlineCost - The heuristic used to determine if we should inline the
|
||||||
// function call or not.
|
// function call or not.
|
||||||
//
|
//
|
||||||
int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline);
|
int getInlineCost(CallSite CS,
|
||||||
|
SmallPtrSet<const Function *, 16> &NeverInline);
|
||||||
|
|
||||||
|
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||||
|
// higher threshold to determine if the function call should be inlined.
|
||||||
|
float getInlineFudgeFactor(CallSite CS);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,6 +40,9 @@ namespace {
|
|||||||
int getInlineCost(CallSite CS) {
|
int getInlineCost(CallSite CS) {
|
||||||
return CA.getInlineCost(CS, NeverInline);
|
return CA.getInlineCost(CS, NeverInline);
|
||||||
}
|
}
|
||||||
|
float getInlineFudgeFactor(CallSite CS) {
|
||||||
|
return CA.getInlineFudgeFactor(CS);
|
||||||
|
}
|
||||||
virtual bool doInitialization(CallGraph &CG);
|
virtual bool doInitialization(CallGraph &CG);
|
||||||
};
|
};
|
||||||
char SimpleInliner::ID = 0;
|
char SimpleInliner::ID = 0;
|
||||||
|
@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined");
|
|||||||
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
|
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
cl::opt<int> // FIXME: 200 is VERY conservative
|
cl::opt<int>
|
||||||
InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
|
InlineLimit("inline-threshold", cl::Hidden, cl::init(400),
|
||||||
cl::desc("Control the amount of inlining to perform (default = 200)"));
|
cl::desc("Control the amount of inlining to perform (default = 400)"));
|
||||||
}
|
}
|
||||||
|
|
||||||
Inliner::Inliner(const void *ID)
|
Inliner::Inliner(const void *ID)
|
||||||
@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
|
|||||||
// try to do so.
|
// try to do so.
|
||||||
CallSite CS = CallSites[CSi];
|
CallSite CS = CallSites[CSi];
|
||||||
int InlineCost = getInlineCost(CS);
|
int InlineCost = getInlineCost(CS);
|
||||||
if (InlineCost >= (int)InlineThreshold) {
|
float FudgeFactor = getInlineFudgeFactor(CS);
|
||||||
|
|
||||||
|
if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) {
|
||||||
DOUT << " NOT Inlining: cost=" << InlineCost
|
DOUT << " NOT Inlining: cost=" << InlineCost
|
||||||
<< ", Call: " << *CS.getInstruction();
|
<< ", Call: " << *CS.getInstruction();
|
||||||
} else {
|
} else {
|
||||||
|
@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
|
|||||||
/// analyzeFunction - Fill in the current structure with information gleaned
|
/// analyzeFunction - Fill in the current structure with information gleaned
|
||||||
/// from the specified function.
|
/// from the specified function.
|
||||||
void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
||||||
unsigned NumInsts = 0, NumBlocks = 0;
|
unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;
|
||||||
|
|
||||||
// Look at the size of the callee. Each basic block counts as 20 units, and
|
// Look at the size of the callee. Each basic block counts as 20 units, and
|
||||||
// each instruction counts as 5.
|
// each instruction counts as 5.
|
||||||
@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
|||||||
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
|
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
|
||||||
II != E; ++II) {
|
II != E; ++II) {
|
||||||
if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count.
|
if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count.
|
||||||
|
if (isa<PHINode>(II)) continue; // PHI nodes don't count.
|
||||||
|
|
||||||
|
if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) ||
|
||||||
|
isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType()))
|
||||||
|
++NumVectorInsts;
|
||||||
|
|
||||||
// Noop casts, including ptr <-> int, don't count.
|
// Noop casts, including ptr <-> int, don't count.
|
||||||
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
|
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
|
||||||
@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
|||||||
isa<PtrToIntInst>(CI))
|
isa<PtrToIntInst>(CI))
|
||||||
continue;
|
continue;
|
||||||
} else if (const GetElementPtrInst *GEPI =
|
} else if (const GetElementPtrInst *GEPI =
|
||||||
dyn_cast<GetElementPtrInst>(II)) {
|
dyn_cast<GetElementPtrInst>(II)) {
|
||||||
// If a GEP has all constant indices, it will probably be folded with
|
// If a GEP has all constant indices, it will probably be folded with
|
||||||
// a load/store.
|
// a load/store.
|
||||||
bool AllConstant = true;
|
bool AllConstant = true;
|
||||||
@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
|||||||
++NumBlocks;
|
++NumBlocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
this->NumBlocks = NumBlocks;
|
this->NumBlocks = NumBlocks;
|
||||||
this->NumInsts = NumInsts;
|
this->NumInsts = NumInsts;
|
||||||
|
this->NumVectorInsts = NumVectorInsts;
|
||||||
|
|
||||||
// Check out all of the arguments to the function, figuring out how much
|
// Check out all of the arguments to the function, figuring out how much
|
||||||
// code can be eliminated if one of the arguments is a constant.
|
// code can be eliminated if one of the arguments is a constant.
|
||||||
@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS,
|
|||||||
//
|
//
|
||||||
InlineCost += Caller->size()/20;
|
InlineCost += Caller->size()/20;
|
||||||
|
|
||||||
|
|
||||||
// Look at the size of the callee. Each basic block counts as 20 units, and
|
// Look at the size of the callee. Each basic block counts as 20 units, and
|
||||||
// each instruction counts as 5.
|
// each instruction counts as 5.
|
||||||
InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
|
InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
|
||||||
|
|
||||||
return InlineCost;
|
return InlineCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||||
|
// higher threshold to determine if the function call should be inlined.
|
||||||
|
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
|
||||||
|
Function *Callee = CS.getCalledFunction();
|
||||||
|
|
||||||
|
// Get information about the callee...
|
||||||
|
FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
|
||||||
|
|
||||||
|
// If we haven't calculated this information yet, do so now.
|
||||||
|
if (CalleeFI.NumBlocks == 0)
|
||||||
|
CalleeFI.analyzeFunction(Callee);
|
||||||
|
|
||||||
|
// Be more aggressive if the function contains a good chunk (if it mades up
|
||||||
|
// at least 10% of the instructions) of vector instructions.
|
||||||
|
if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
|
||||||
|
return 1.5f;
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user