mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Do not inline functions with (dynamic) alloca into
functions that don't already have a (dynamic) alloca. Dynamic allocas cause inefficient codegen and we shouldn't propagate this (behavior follows gcc). Two existing tests assumed such inlining would be done; they are hacked by adding an alloca in the caller, preserving the point of the tests. llvm-svn: 61946
This commit is contained in:
parent
51e12762d5
commit
4c25cb12ea
@ -78,6 +78,9 @@ namespace llvm {
|
|||||||
/// caller.
|
/// caller.
|
||||||
bool NeverInline;
|
bool NeverInline;
|
||||||
|
|
||||||
|
/// usesDynamicAlloca - True if this function calls alloca (in the C sense).
|
||||||
|
bool usesDynamicAlloca;
|
||||||
|
|
||||||
/// NumInsts, NumBlocks - Keep track of how large each function is, which
|
/// NumInsts, NumBlocks - Keep track of how large each function is, which
|
||||||
/// is used to estimate the code size cost of inlining it.
|
/// is used to estimate the code size cost of inlining it.
|
||||||
unsigned NumInsts, NumBlocks;
|
unsigned NumInsts, NumBlocks;
|
||||||
@ -93,8 +96,8 @@ namespace llvm {
|
|||||||
/// entry here.
|
/// entry here.
|
||||||
std::vector<ArgInfo> ArgumentWeights;
|
std::vector<ArgInfo> ArgumentWeights;
|
||||||
|
|
||||||
FunctionInfo() : NeverInline(false), NumInsts(0), NumBlocks(0),
|
FunctionInfo() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0),
|
||||||
NumVectorInsts(0) {}
|
NumBlocks(0), NumVectorInsts(0) {}
|
||||||
|
|
||||||
/// analyzeFunction - Fill in the current structure with information
|
/// analyzeFunction - Fill in the current structure with information
|
||||||
/// gleaned from the specified function.
|
/// gleaned from the specified function.
|
||||||
|
@ -126,6 +126,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
|
|||||||
NumInsts += 5;
|
NumInsts += 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
|
||||||
|
if (!isa<ConstantInt>(AI->getArraySize()))
|
||||||
|
this->usesDynamicAlloca = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
|
if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
|
||||||
++NumVectorInsts;
|
++NumVectorInsts;
|
||||||
|
|
||||||
@ -173,7 +178,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
|
|||||||
SmallPtrSet<const Function *, 16> &NeverInline) {
|
SmallPtrSet<const Function *, 16> &NeverInline) {
|
||||||
Instruction *TheCall = CS.getInstruction();
|
Instruction *TheCall = CS.getInstruction();
|
||||||
Function *Callee = CS.getCalledFunction();
|
Function *Callee = CS.getCalledFunction();
|
||||||
const Function *Caller = TheCall->getParent()->getParent();
|
Function *Caller = TheCall->getParent()->getParent();
|
||||||
|
|
||||||
// Don't inline a directly recursive call.
|
// Don't inline a directly recursive call.
|
||||||
if (Caller == Callee ||
|
if (Caller == Callee ||
|
||||||
@ -219,11 +224,24 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
|
|||||||
// If we haven't calculated this information yet, do so now.
|
// If we haven't calculated this information yet, do so now.
|
||||||
if (CalleeFI.NumBlocks == 0)
|
if (CalleeFI.NumBlocks == 0)
|
||||||
CalleeFI.analyzeFunction(Callee);
|
CalleeFI.analyzeFunction(Callee);
|
||||||
|
|
||||||
// If we should never inline this, return a huge cost.
|
// If we should never inline this, return a huge cost.
|
||||||
if (CalleeFI.NeverInline)
|
if (CalleeFI.NeverInline)
|
||||||
return InlineCost::getNever();
|
return InlineCost::getNever();
|
||||||
|
|
||||||
|
// Get infomation about the caller...
|
||||||
|
FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
|
||||||
|
|
||||||
|
// If we haven't calculated this information yet, do so now.
|
||||||
|
if (CallerFI.NumBlocks == 0)
|
||||||
|
CallerFI.analyzeFunction(Caller);
|
||||||
|
|
||||||
|
// Don't inline a callee with dynamic alloca into a caller without them.
|
||||||
|
// Functions containing dynamic alloca's are inefficient in various ways;
|
||||||
|
// don't create more inefficiency.
|
||||||
|
if (CalleeFI.usesDynamicAlloca && !CallerFI.usesDynamicAlloca)
|
||||||
|
return InlineCost::getNever();
|
||||||
|
|
||||||
// FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
|
// FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
|
||||||
// could move this up and avoid computing the FunctionInfo for
|
// could move this up and avoid computing the FunctionInfo for
|
||||||
// things we are going to just return always inline for. This
|
// things we are going to just return always inline for. This
|
||||||
|
36
test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
Normal file
36
test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
; RUN: llvm-as < %s | opt -inline | llvm-dis | grep call
|
||||||
|
; Do not inline calls to variable-sized alloca.
|
||||||
|
|
||||||
|
@q = common global i8* null ; <i8**> [#uses=1]
|
||||||
|
|
||||||
|
define i8* @a(i32 %i) nounwind {
|
||||||
|
entry:
|
||||||
|
%i_addr = alloca i32 ; <i32*> [#uses=2]
|
||||||
|
%retval = alloca i8* ; <i8**> [#uses=1]
|
||||||
|
%p = alloca i8* ; <i8**> [#uses=2]
|
||||||
|
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
||||||
|
store i32 %i, i32* %i_addr
|
||||||
|
%0 = load i32* %i_addr, align 4 ; <i32> [#uses=1]
|
||||||
|
%1 = alloca i8, i32 %0 ; <i8*> [#uses=1]
|
||||||
|
store i8* %1, i8** %p, align 4
|
||||||
|
%2 = load i8** %p, align 4 ; <i8*> [#uses=1]
|
||||||
|
store i8* %2, i8** @q, align 4
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return: ; preds = %entry
|
||||||
|
%retval1 = load i8** %retval ; <i8*> [#uses=1]
|
||||||
|
ret i8* %retval1
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @b(i32 %i) nounwind {
|
||||||
|
entry:
|
||||||
|
%i_addr = alloca i32 ; <i32*> [#uses=2]
|
||||||
|
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
||||||
|
store i32 %i, i32* %i_addr
|
||||||
|
%0 = load i32* %i_addr, align 4 ; <i32> [#uses=1]
|
||||||
|
%1 = call i8* @a(i32 %0) nounwind ; <i8*> [#uses=0]
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return: ; preds = %entry
|
||||||
|
ret void
|
||||||
|
}
|
@ -1,5 +1,7 @@
|
|||||||
; Test that functions with dynamic allocas get inlined in a case where
|
; Test that functions with dynamic allocas get inlined in a case where
|
||||||
; naively inlining it would result in a miscompilation.
|
; naively inlining it would result in a miscompilation.
|
||||||
|
; Functions with dynamic allocas can only be inlined into functions that
|
||||||
|
; already have dynamic allocas.
|
||||||
|
|
||||||
; RUN: llvm-as < %s | opt -inline | llvm-dis | \
|
; RUN: llvm-as < %s | opt -inline | llvm-dis | \
|
||||||
; RUN: grep llvm.stacksave
|
; RUN: grep llvm.stacksave
|
||||||
@ -16,6 +18,8 @@ define internal void @callee(i32 %N) {
|
|||||||
|
|
||||||
define void @foo(i32 %N) {
|
define void @foo(i32 %N) {
|
||||||
; <label>:0
|
; <label>:0
|
||||||
|
%P = alloca i32, i32 %N ; <i32*> [#uses=1]
|
||||||
|
call void @ext( i32* %P )
|
||||||
br label %Loop
|
br label %Loop
|
||||||
|
|
||||||
Loop: ; preds = %Loop, %0
|
Loop: ; preds = %Loop, %0
|
||||||
|
@ -636,6 +636,9 @@ lpad: ; preds = %entry, %invcont
|
|||||||
|
|
||||||
define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) {
|
define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) {
|
||||||
entry:
|
entry:
|
||||||
|
%P0 = load i32 * @__gnat_all_others_value, align 4 ; <i32*> [#uses=1]
|
||||||
|
%P = alloca i32, i32 %P0 ; <i32*> [#uses=1]
|
||||||
|
call void @ext( i32* %P )
|
||||||
%to_addr = alloca %struct.system__file_control_block__pstring ; <%struct.system__file_control_block__pstring*> [#uses=4]
|
%to_addr = alloca %struct.system__file_control_block__pstring ; <%struct.system__file_control_block__pstring*> [#uses=4]
|
||||||
%FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4 ; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65]
|
%FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4 ; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65]
|
||||||
%0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0 ; <i8**> [#uses=1]
|
%0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0 ; <i8**> [#uses=1]
|
||||||
@ -1435,6 +1438,8 @@ declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
|
|||||||
|
|
||||||
declare void @report__failed(i8*, %struct.string___XUB*)
|
declare void @report__failed(i8*, %struct.string___XUB*)
|
||||||
|
|
||||||
|
declare void @ext(i32*)
|
||||||
|
|
||||||
declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*)
|
declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*)
|
||||||
|
|
||||||
declare void @report__result()
|
declare void @report__result()
|
||||||
|
Loading…
Reference in New Issue
Block a user