[inliner] Completely change (and fix) how the inline cost analysis

handles terminator instructions. The inline cost analysis inheritted some pretty rough handling of terminator insts from the original cost analysis, and then made it much, much worse by factoring all of the important analyses into a separate instruction visitor. That instruction visitor never visited the terminator. This works fine for things like conditional branches, but for many other things we simply computed The Wrong Value. First example are unconditional branches, which should be free but were counted as full cost. This is most significant for conditional branches where the condition simplifies and folds during inlining. We paid a 1 instruction tax on every branch in a straight line specialized path. =[ Oh, we also claimed that the unreachable instruction had cost. But it gets worse. Let's consider invoke. We never applied the call penalty. We never accounted for the cost of the arguments. Nope. Worse still, we didn't handle the *correctness* constraints of not inlining recursive invokes, or exception throwing returns_twice functions. Oops. See PR18206. Sadly, PR18206 requires yet another fix, but this refactoring is at least a huge step in that direction. llvm-svn: 197215
2024-11-25 20:23:11 +01:00 · 2013-12-13 07:59:56 +00:00 · 2013-12-13 07:59:56 +00:00 · af9509e185
commit af9509e185
parent d33cff338c
2 changed files with 121 additions and 37 deletions
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  bool ExposesReturnsTwice;
  bool HasDynamicAlloca;
  bool ContainsNoDuplicateCall;
+  bool HasReturn;
+  bool HasIndirectBr;

  /// Number of bytes allocated statically by the callee.
  uint64_t AllocatedSize;
@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  bool visitExtractValue(ExtractValueInst &I);
  bool visitInsertValue(InsertValueInst &I);
  bool visitCallSite(CallSite CS);
+  bool visitReturnInst(ReturnInst &RI);
+  bool visitBranchInst(BranchInst &BI);
+  bool visitSwitchInst(SwitchInst &SI);
+  bool visitIndirectBrInst(IndirectBrInst &IBI);
+  bool visitResumeInst(ResumeInst &RI);
+  bool visitUnreachableInst(UnreachableInst &I);

 public:
  CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@ -139,12 +147,13 @@ public:
      : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
        IsCallerRecursive(false), IsRecursiveCall(false),
        ExposesReturnsTwice(false), HasDynamicAlloca(false),
-        ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
-        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
-        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
-        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
-        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
-        SROACostSavings(0), SROACostSavingsLost(0) {}
+        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+        NumInstructionsSimplified(0), SROACostSavings(0),
+        SROACostSavingsLost(0) {}

  bool analyzeCall(CallSite CS);

@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
  return Base::visitCallSite(CS);
 }

+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+  // At least one return instruction will be free after inlining.
+  bool Free = !HasReturn;
+  HasReturn = true;
+  return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+  // We model unconditional branches as essentially free -- they really
+  // shouldn't exist at all, but handling them makes the behavior of the
+  // inliner more regular and predictable. Interestingly, conditional branches
+  // which will fold away are also free.
+  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+         dyn_cast_or_null<ConstantInt>(
+             SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+  // We model unconditional switches as free, see the comments on handling
+  // branches.
+  return isa<ConstantInt>(SI.getCondition()) ||
+         dyn_cast_or_null<ConstantInt>(
+             SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this
+  // indirect jump would jump from the inlined copy of the function into the
+  // original function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions with
+  // indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.  And as a QOI issue,
+  // if someone is using a blockaddress without an indirectbr, and that
+  // reference somehow ends up in another function or global, we probably don't
+  // want to inline this function.
+  HasIndirectBr = true;
+  return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a resume instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+  // FIXME: It might be reasonably to discount the cost of instructions leading
+  // to unreachable as they have the lowest possible impact on both runtime and
+  // code size.
+  return true; // No actual code is needed for unreachable.
+}
+
 bool CallAnalyzer::visitInstruction(Instruction &I) {
  // Some instructions are free. All of the free intrinsics can also be
  // handled by SROA, etc.
@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
 /// construct has been detected. It returns false if inlining is no longer
 /// viable, and true if inlining remains viable.
 bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
-  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
-       I != E; ++I) {
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
    ++NumInstructions;
    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
      ++NumVectorInstructions;
@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
      Cost += InlineConstants::InstrCost;

    // If the visit this instruction detected an uninlinable pattern, abort.
-    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+        HasIndirectBr)
      return false;

    // If the caller is a recursive function then we don't want to inline
@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
    }
  }

-  // Track whether we've seen a return instruction. The first return
-  // instruction is free, as at least one will usually disappear in inlining.
-  bool HasReturn = false;
-
  // Populate our simplified values by mapping from function arguments to call
  // arguments with known important simplifications.
  CallSite::arg_iterator CAI = CS.arg_begin();
@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
    if (BB->empty())
      continue;

-    // Handle the terminator cost here where we can track returns and other
-    // function-wide constructs.
-    TerminatorInst *TI = BB->getTerminator();
-
-    // We never want to inline functions that contain an indirectbr.  This is
-    // incorrect because all the blockaddress's (in static global initializers
-    // for example) would be referring to the original function, and this
-    // indirect jump would jump from the inlined copy of the function into the 
-    // original function which is extremely undefined behavior.
-    // FIXME: This logic isn't really right; we can safely inline functions
-    // with indirectbr's as long as no other function or global references the
-    // blockaddress of a block within the current function.  And as a QOI issue,
-    // if someone is using a blockaddress without an indirectbr, and that
-    // reference somehow ends up in another function or global, we probably
-    // don't want to inline this function.
-    if (isa<IndirectBrInst>(TI))
-      return false;
-
-    if (!HasReturn && isa<ReturnInst>(TI))
-      HasReturn = true;
-    else
-      Cost += InlineConstants::InstrCost;
-
    // Analyze the cost of this block. If we blow through the threshold, this
    // returns false, and we can bail on out.
    if (!analyzeBlock(BB)) {
-      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+          HasIndirectBr)
        return false;

      // If the caller is a recursive function then we don't want to inline
@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
      break;
    }

+    TerminatorInst *TI = BB->getTerminator();
+
    // Add in the live successors by first checking whether we have terminator
    // that may be simplified based on the values simplified by this call.
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
--- a/test/Transforms/Inline/invoke-cost.ll
+++ b/test/Transforms/Inline/invoke-cost.ll
@ -0,0 +1,45 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+@glbl = external global i32
+
+declare void @f()
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare void @_ZSt9terminatev()
+
+define void @inner1() {
+entry:
+  invoke void @f() to label %cont1 unwind label %terminate.lpad
+
+cont1:
+  invoke void @f() to label %cont2 unwind label %terminate.lpad
+
+cont2:
+  invoke void @f() to label %cont3 unwind label %terminate.lpad
+
+cont3:
+  invoke void @f() to label %cont4 unwind label %terminate.lpad
+
+cont4:
+  ret void
+
+terminate.lpad:
+  landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+define void @outer1() {
+; CHECK-LABEL: @outer1(
+;
+; This call should not get inlined because inner1 actually calls a function
+; many times, but it only does so through invoke as opposed to call.
+;
+; CHECK: call void @inner1
+  call void @inner1()
+  ret void
+}