Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it).

Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining. llvm-svn: 48725
2025-01-31 20:51:52 +01:00 · 2008-03-24 06:37:48 +00:00 · 2008-03-24 06:37:48 +00:00 · d01a2a18f8
commit d01a2a18f8
parent 00ecee8d58
5 changed files with 54 additions and 11 deletions
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass {
  ///
  virtual int getInlineCost(CallSite CS) = 0;
  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
  // higher threshold to determine if the function call should be inlined.
  ///
  virtual float getInlineFudgeFactor(CallSite CS) = 0;
 private:
  // InlineThreshold - Cache the value here for easy access.
  unsigned InlineThreshold;
--- a/include/llvm/Transforms/Utils/InlineCost.h
+++ b/include/llvm/Transforms/Utils/InlineCost.h
@ -42,13 +42,17 @@ namespace llvm {
      // used to estimate the code size cost of inlining it.
      unsigned NumInsts, NumBlocks;
      // NumVectorInsts - Keep track how many instrctions produce vector values.
      // The inliner is being more aggressive with inlining vector kernels.
      unsigned NumVectorInsts;
      // ArgumentWeights - Each formal argument of the function is inspected to
      // see if it is used in any contexts where making it a constant or alloca
      // would reduce the code size.  If so, we add some value to the argument
      // entry here.
      std::vector<ArgInfo> ArgumentWeights;
-      FunctionInfo() : NumInsts(0), NumBlocks(0) {}
+      FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {}
      /// analyzeFunction - Fill in the current structure with information gleaned
      /// from the specified function.
@ -73,7 +77,12 @@ namespace llvm {
    // getInlineCost - The heuristic used to determine if we should inline the
    // function call or not.
    //
-    int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline);
+    int getInlineCost(CallSite CS,
                      SmallPtrSet<const Function *, 16> &NeverInline);
    // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
    // higher threshold to determine if the function call should be inlined.
    float getInlineFudgeFactor(CallSite CS);
  };
 }
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@ -40,6 +40,9 @@ namespace {
    int getInlineCost(CallSite CS) {
      return CA.getInlineCost(CS, NeverInline);
    }
    float getInlineFudgeFactor(CallSite CS) {
      return CA.getInlineFudgeFactor(CS);
    }
    virtual bool doInitialization(CallGraph &CG);
  };
  char SimpleInliner::ID = 0;
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 namespace {
-  cl::opt<int>             // FIXME: 200 is VERY conservative
+  cl::opt<int>
-  InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
+  InlineLimit("inline-threshold", cl::Hidden, cl::init(400),
-        cl::desc("Control the amount of inlining to perform (default = 200)"));
+        cl::desc("Control the amount of inlining to perform (default = 400)"));
 }
 Inliner::Inliner(const void *ID) 
@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
        // try to do so.
        CallSite CS = CallSites[CSi];
        int InlineCost = getInlineCost(CS);
-        if (InlineCost >= (int)InlineThreshold) {
+        float FudgeFactor = getInlineFudgeFactor(CS);
        if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) {
          DOUT << "    NOT Inlining: cost=" << InlineCost
               << ", Call: " << *CS.getInstruction();
        } else {
--- a/lib/Transforms/Utils/InlineCost.cpp
+++ b/lib/Transforms/Utils/InlineCost.cpp
@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
 void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
-  unsigned NumInsts = 0, NumBlocks = 0;
+  unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;
  // Look at the size of the callee.  Each basic block counts as 20 units, and
  // each instruction counts as 5.
@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
    for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
         II != E; ++II) {
      if (isa<DbgInfoIntrinsic>(II)) continue;  // Debug intrinsics don't count.
      if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
      if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) ||
          isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType()))
        ++NumVectorInsts; 
      // Noop casts, including ptr <-> int,  don't count.
      if (const CastInst *CI = dyn_cast<CastInst>(II)) {
@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
            isa<PtrToIntInst>(CI))
          continue;
      } else if (const GetElementPtrInst *GEPI =
-                         dyn_cast<GetElementPtrInst>(II)) {
+                 dyn_cast<GetElementPtrInst>(II)) {
        // If a GEP has all constant indices, it will probably be folded with
        // a load/store.
        bool AllConstant = true;
@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
    ++NumBlocks;
  }
-  this->NumBlocks = NumBlocks;
+  this->NumBlocks      = NumBlocks;
-  this->NumInsts  = NumInsts;
+  this->NumInsts       = NumInsts;
  this->NumVectorInsts = NumVectorInsts;
  // Check out all of the arguments to the function, figuring out how much
  // code can be eliminated if one of the arguments is a constant.
@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS,
  //
  InlineCost += Caller->size()/20;
  // Look at the size of the callee.  Each basic block counts as 20 units, and
  // each instruction counts as 5.
  InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
  return InlineCost;
 }
 // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
 // higher threshold to determine if the function call should be inlined.
 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
  Function *Callee = CS.getCalledFunction();
  // Get information about the callee...
  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
  // If we haven't calculated this information yet, do so now.
  if (CalleeFI.NumBlocks == 0)
    CalleeFI.analyzeFunction(Callee);
  // Be more aggressive if the function contains a good chunk (if it mades up
  // at least 10% of the instructions) of vector instructions.
  if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
    return 1.5f;
  return 1.0f;
 }