Do not inline functions with (dynamic) alloca into

functions that don't already have a (dynamic) alloca. Dynamic allocas cause inefficient codegen and we shouldn't propagate this (behavior follows gcc). Two existing tests assumed such inlining would be done; they are hacked by adding an alloca in the caller, preserving the point of the tests. llvm-svn: 61946
2024-11-24 03:33:20 +01:00 · 2009-01-08 21:45:23 +00:00 · 2009-01-08 21:45:23 +00:00 · 4c25cb12ea
commit 4c25cb12ea
parent 51e12762d5
5 changed files with 70 additions and 4 deletions
--- a/include/llvm/Transforms/Utils/InlineCost.h
+++ b/include/llvm/Transforms/Utils/InlineCost.h
@ -78,6 +78,9 @@ namespace llvm {
      /// caller.
      bool NeverInline;
      /// usesDynamicAlloca - True if this function calls alloca (in the C sense).
      bool usesDynamicAlloca;
      /// NumInsts, NumBlocks - Keep track of how large each function is, which
      /// is used to estimate the code size cost of inlining it.
      unsigned NumInsts, NumBlocks;
@ -93,8 +96,8 @@ namespace llvm {
      /// entry here.
      std::vector<ArgInfo> ArgumentWeights;
-      FunctionInfo() : NeverInline(false), NumInsts(0), NumBlocks(0),
+      FunctionInfo() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0),
-                       NumVectorInsts(0) {}
+                       NumBlocks(0), NumVectorInsts(0) {}
      /// analyzeFunction - Fill in the current structure with information
      /// gleaned from the specified function.
--- a/lib/Transforms/Utils/InlineCost.cpp
+++ b/lib/Transforms/Utils/InlineCost.cpp
@ -126,6 +126,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
          NumInsts += 5;
      }
      if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
        if (!isa<ConstantInt>(AI->getArraySize()))
          this->usesDynamicAlloca = true;
      }
      if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
        ++NumVectorInsts; 
@ -173,7 +178,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
                               SmallPtrSet<const Function *, 16> &NeverInline) {
  Instruction *TheCall = CS.getInstruction();
  Function *Callee = CS.getCalledFunction();
-  const Function *Caller = TheCall->getParent()->getParent();
+  Function *Caller = TheCall->getParent()->getParent();
  // Don't inline a directly recursive call.
  if (Caller == Callee ||
@ -219,11 +224,24 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
  // If we haven't calculated this information yet, do so now.
  if (CalleeFI.NumBlocks == 0)
    CalleeFI.analyzeFunction(Callee);
-  
+
  // If we should never inline this, return a huge cost.
  if (CalleeFI.NeverInline)
    return InlineCost::getNever();
  // Get infomation about the caller...
  FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
  // If we haven't calculated this information yet, do so now.
  if (CallerFI.NumBlocks == 0)
    CallerFI.analyzeFunction(Caller);
  // Don't inline a callee with dynamic alloca into a caller without them.
  // Functions containing dynamic alloca's are inefficient in various ways;
  // don't create more inefficiency.
  if (CalleeFI.usesDynamicAlloca && !CallerFI.usesDynamicAlloca)
    return InlineCost::getNever();
  // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
  // could move this up and avoid computing the FunctionInfo for
  // things we are going to just return always inline for. This
--- a/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
+++ b/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
@ -0,0 +1,36 @@
 ; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call
 ; Do not inline calls to variable-sized alloca.
@q = common global i8* null		; <i8**> [#uses=1]
 define i8* @a(i32 %i) nounwind {
 entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i8*		; <i8**> [#uses=1]
 	%p = alloca i8*		; <i8**> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
 	%1 = alloca i8, i32 %0		; <i8*> [#uses=1]
 	store i8* %1, i8** %p, align 4
 	%2 = load i8** %p, align 4		; <i8*> [#uses=1]
 	store i8* %2, i8** @q, align 4
 	br label %return
 return:		; preds = %entry
 	%retval1 = load i8** %retval		; <i8*> [#uses=1]
 	ret i8* %retval1
 }
 define void @b(i32 %i) nounwind {
 entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
 	%1 = call i8* @a(i32 %0) nounwind		; <i8*> [#uses=0]
 	br label %return
 return:		; preds = %entry
 	ret void
 }
--- a/test/Transforms/Inline/dynamic_alloca_test.ll
+++ b/test/Transforms/Inline/dynamic_alloca_test.ll
@ -1,5 +1,7 @@
 ; Test that functions with dynamic allocas get inlined in a case where
 ; naively inlining it would result in a miscompilation.
 ; Functions with dynamic allocas can only be inlined into functions that
 ; already have dynamic allocas.
 ; RUN: llvm-as < %s | opt -inline | llvm-dis | \
 ; RUN:   grep llvm.stacksave
@ -16,6 +18,8 @@ define internal void @callee(i32 %N) {
 define void @foo(i32 %N) {
 ; <label>:0
        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
        call void @ext( i32* %P )
        br label %Loop
 Loop:           ; preds = %Loop, %0
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@ -636,6 +636,9 @@ lpad:		; preds = %entry, %invcont
 define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) {
 entry:
        %P0 = load i32 * @__gnat_all_others_value, align 4  ; <i32*> [#uses=1]
        %P = alloca i32, i32 %P0	; <i32*> [#uses=1]
        call void @ext( i32* %P )
 	%to_addr = alloca %struct.system__file_control_block__pstring		; <%struct.system__file_control_block__pstring*> [#uses=4]
 	%FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4		; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65]
 	%0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0		; <i8**> [#uses=1]
@ -1435,6 +1438,8 @@ declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
 declare void @report__failed(i8*, %struct.string___XUB*)
 declare void @ext(i32*)
 declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*)
 declare void @report__result()