Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it).

Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining. llvm-svn: 48725
2025-01-31 20:51:52 +01:00 · 2008-03-24 06:37:48 +00:00 · 2008-03-24 06:37:48 +00:00 · d01a2a18f8
commit d01a2a18f8
parent 00ecee8d58
5 changed files with 54 additions and 11 deletions
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass {
  ///
  virtual int getInlineCost(CallSite CS) = 0;

+  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+  // higher threshold to determine if the function call should be inlined.
+  ///
+  virtual float getInlineFudgeFactor(CallSite CS) = 0;
+
 private:
  // InlineThreshold - Cache the value here for easy access.
  unsigned InlineThreshold;
--- a/include/llvm/Transforms/Utils/InlineCost.h
+++ b/include/llvm/Transforms/Utils/InlineCost.h
@ -41,6 +41,10 @@ namespace llvm {
      // NumInsts, NumBlocks - Keep track of how large each function is, which is
      // used to estimate the code size cost of inlining it.
      unsigned NumInsts, NumBlocks;
+
+      // NumVectorInsts - Keep track how many instrctions produce vector values.
+      // The inliner is being more aggressive with inlining vector kernels.
+      unsigned NumVectorInsts;
      
      // ArgumentWeights - Each formal argument of the function is inspected to
      // see if it is used in any contexts where making it a constant or alloca
@ -48,7 +52,7 @@ namespace llvm {
      // entry here.
      std::vector<ArgInfo> ArgumentWeights;
      
-      FunctionInfo() : NumInsts(0), NumBlocks(0) {}
+      FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {}
      
      /// analyzeFunction - Fill in the current structure with information gleaned
      /// from the specified function.
@ -73,7 +77,12 @@ namespace llvm {
    // getInlineCost - The heuristic used to determine if we should inline the
    // function call or not.
    //
-    int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline);
+    int getInlineCost(CallSite CS,
+                      SmallPtrSet<const Function *, 16> &NeverInline);
+
+    // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+    // higher threshold to determine if the function call should be inlined.
+    float getInlineFudgeFactor(CallSite CS);
  };
 }

--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@ -40,6 +40,9 @@ namespace {
    int getInlineCost(CallSite CS) {
      return CA.getInlineCost(CS, NeverInline);
    }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
    virtual bool doInitialization(CallGraph &CG);
  };
  char SimpleInliner::ID = 0;
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");

 namespace {
-  cl::opt<int>             // FIXME: 200 is VERY conservative
-  InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
-        cl::desc("Control the amount of inlining to perform (default = 200)"));
+  cl::opt<int>
+  InlineLimit("inline-threshold", cl::Hidden, cl::init(400),
+        cl::desc("Control the amount of inlining to perform (default = 400)"));
 }

 Inliner::Inliner(const void *ID) 
@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
        // try to do so.
        CallSite CS = CallSites[CSi];
        int InlineCost = getInlineCost(CS);
-        if (InlineCost >= (int)InlineThreshold) {
+        float FudgeFactor = getInlineFudgeFactor(CS);
+
+        if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) {
          DOUT << "    NOT Inlining: cost=" << InlineCost
               << ", Call: " << *CS.getInstruction();
        } else {
--- a/lib/Transforms/Utils/InlineCost.cpp
+++ b/lib/Transforms/Utils/InlineCost.cpp
@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
 void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
-  unsigned NumInsts = 0, NumBlocks = 0;
+  unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;

  // Look at the size of the callee.  Each basic block counts as 20 units, and
  // each instruction counts as 5.
@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
    for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
         II != E; ++II) {
      if (isa<DbgInfoIntrinsic>(II)) continue;  // Debug intrinsics don't count.
+      if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+      if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) ||
+          isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType()))
+        ++NumVectorInsts; 
      
      // Noop casts, including ptr <-> int,  don't count.
      if (const CastInst *CI = dyn_cast<CastInst>(II)) {
@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
            isa<PtrToIntInst>(CI))
          continue;
      } else if (const GetElementPtrInst *GEPI =
-                         dyn_cast<GetElementPtrInst>(II)) {
+                 dyn_cast<GetElementPtrInst>(II)) {
        // If a GEP has all constant indices, it will probably be folded with
        // a load/store.
        bool AllConstant = true;
@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
    ++NumBlocks;
  }

-  this->NumBlocks = NumBlocks;
-  this->NumInsts  = NumInsts;
+  this->NumBlocks      = NumBlocks;
+  this->NumInsts       = NumInsts;
+  this->NumVectorInsts = NumVectorInsts;

  // Check out all of the arguments to the function, figuring out how much
  // code can be eliminated if one of the arguments is a constant.
@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS,
  //
  InlineCost += Caller->size()/20;
  
-  
  // Look at the size of the callee.  Each basic block counts as 20 units, and
  // each instruction counts as 5.
  InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
+
  return InlineCost;
 }

+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
+    return 1.5f;
+  return 1.0f;
+}