diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index dfe4b97812c..d1c91a2a492 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -212,15 +212,148 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( return Reduction; } +static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics, + ICmpInst *ICI) { + unsigned Reduction = 0; + + // Bail if this is comparing against a non-constant; there is nothing we can + // do there. + if (!isa(ICI->getOperand(1))) + return Reduction; + + // An icmp pred (alloca, C) becomes true if the predicate is true when + // equal and false otherwise. + bool Result = ICI->isTrueWhenEqual(); + + SmallVector Worklist; + Worklist.push_back(ICI); + do { + Instruction *U = Worklist.pop_back_val(); + Reduction += InlineConstants::InstrCost; + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) { + Instruction *I = dyn_cast(*UI); + if (!I || I->mayHaveSideEffects()) continue; + if (I->getNumOperands() == 1) + Worklist.push_back(I); + if (BinaryOperator *BO = dyn_cast(I)) { + // If BO produces the same value as U, then the other operand is + // irrelevant and we can put it into the Worklist to continue + // deleting dead instructions. If BO produces the same value as the + // other operand, we can delete BO but that's it. + if (Result == true) { + if (BO->getOpcode() == Instruction::Or) + Worklist.push_back(I); + if (BO->getOpcode() == Instruction::And) + Reduction += InlineConstants::InstrCost; + } else { + if (BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor) + Reduction += InlineConstants::InstrCost; + if (BO->getOpcode() == Instruction::And) + Worklist.push_back(I); + } + } + if (BranchInst *BI = dyn_cast(I)) { + BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); + if (BB->getSinglePredecessor()) + Reduction + += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); + } + } + } while (!Worklist.empty()); + + return Reduction; +} + +/// \brief Compute the reduction possible for a given instruction if we are able +/// to SROA an alloca. +/// +/// The reduction for this instruction is added to the SROAReduction output +/// parameter. Returns false if this instruction is expected to defeat SROA in +/// general. +bool countCodeReductionForSROAInst(Instruction *I, + SmallVectorImpl &Worklist, + unsigned &SROAReduction) { + if (LoadInst *LI = dyn_cast(I)) { + if (!LI->isSimple()) + return false; + SROAReduction += InlineConstants::InstrCost; + return true; + } + + if (StoreInst *SI = dyn_cast(I)) { + if (!SI->isSimple()) + return false; + SROAReduction += InlineConstants::InstrCost; + return true; + } + + if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + return false; + // A non-zero GEP will likely become a mask operation after SROA. + if (GEP->hasAllZeroIndices()) + SROAReduction += InlineConstants::InstrCost; + Worklist.push_back(GEP); + return true; + } + + if (BitCastInst *BCI = dyn_cast(I)) { + // Track pointer through bitcasts. + Worklist.push_back(BCI); + SROAReduction += InlineConstants::InstrCost; + return true; + } + + // We just look for non-constant operands to ICmp instructions as those will + // defeat SROA. The actual reduction for these happens even without SROA. + if (ICmpInst *ICI = dyn_cast(I)) + return isa(ICI->getOperand(1)); + + if (SelectInst *SI = dyn_cast(I)) { + // SROA can handle a select of alloca iff all uses of the alloca are + // loads, and dereferenceable. We assume it's dereferenceable since + // we're told the input is an alloca. + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || !LI->isSimple()) + return false; + } + // We don't know whether we'll be deleting the rest of the chain of + // instructions from the SelectInst on, because we don't know whether + // the other side of the select is also an alloca or not. + return true; + } + + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // SROA can usually chew through these intrinsics. + SROAReduction += InlineConstants::InstrCost; + return true; + } + } + + // If there is some other strange instruction, we're not going to be + // able to do much if we inline this. + return false; +} + unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( const CodeMetrics &Metrics, Value *V) { if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; - - // Looking at ICmpInsts will never abort the analysis and return zero, and - // analyzing them is expensive, so save them for last so that we don't do - // extra work that we end up throwing out. - SmallVector ICmpInsts; + unsigned SROAReduction = 0; + bool CanSROAAlloca = true; SmallVector Worklist; Worklist.push_back(V); @@ -229,111 +362,17 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI){ Instruction *I = cast(*UI); - if (LoadInst *LI = dyn_cast(I)) { - if (!LI->isSimple()) - return 0; - Reduction += InlineConstants::InstrCost; - } else if (StoreInst *SI = dyn_cast(I)) { - if (!SI->isSimple()) - return 0; - Reduction += InlineConstants::InstrCost; - } else if (GetElementPtrInst *GEP = dyn_cast(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (!GEP->hasAllConstantIndices()) - return 0; - // A non-zero GEP will likely become a mask operation after SROA. - if (GEP->hasAllZeroIndices()) - Reduction += InlineConstants::InstrCost; - Worklist.push_back(GEP); - } else if (BitCastInst *BCI = dyn_cast(I)) { - // Track pointer through bitcasts. - Worklist.push_back(BCI); - Reduction += InlineConstants::InstrCost; - } else if (SelectInst *SI = dyn_cast(I)) { - // SROA can handle a select of alloca iff all uses of the alloca are - // loads, and dereferenceable. We assume it's dereferenceable since - // we're told the input is an alloca. - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast(*UI); - if (LI == 0 || !LI->isSimple()) return 0; - } - // We don't know whether we'll be deleting the rest of the chain of - // instructions from the SelectInst on, because we don't know whether - // the other side of the select is also an alloca or not. - continue; - } else if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: - return 0; - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // SROA can usually chew through these intrinsics. - Reduction += InlineConstants::InstrCost; - break; - } - } else if (ICmpInst *ICI = dyn_cast(I)) { - if (!isa(ICI->getOperand(1))) - return 0; - ICmpInsts.push_back(ICI); - } else { - // If there is some other strange instruction, we're not going to be - // able to do much if we inline this. - return 0; - } + + if (ICmpInst *ICI = dyn_cast(I)) + Reduction += countCodeReductionForAllocaICmp(Metrics, ICI); + + if (CanSROAAlloca) + CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist, + SROAReduction); } } while (!Worklist.empty()); - while (!ICmpInsts.empty()) { - ICmpInst *ICI = ICmpInsts.pop_back_val(); - - // An icmp pred (alloca, C) becomes true if the predicate is true when - // equal and false otherwise. - bool Result = ICI->isTrueWhenEqual(); - - SmallVector Worklist; - Worklist.push_back(ICI); - do { - Instruction *U = Worklist.pop_back_val(); - Reduction += InlineConstants::InstrCost; - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) { - Instruction *I = dyn_cast(*UI); - if (!I || I->mayHaveSideEffects()) continue; - if (I->getNumOperands() == 1) - Worklist.push_back(I); - if (BinaryOperator *BO = dyn_cast(I)) { - // If BO produces the same value as U, then the other operand is - // irrelevant and we can put it into the Worklist to continue - // deleting dead instructions. If BO produces the same value as the - // other operand, we can delete BO but that's it. - if (Result == true) { - if (BO->getOpcode() == Instruction::Or) - Worklist.push_back(I); - if (BO->getOpcode() == Instruction::And) - Reduction += InlineConstants::InstrCost; - } else { - if (BO->getOpcode() == Instruction::Or || - BO->getOpcode() == Instruction::Xor) - Reduction += InlineConstants::InstrCost; - if (BO->getOpcode() == Instruction::And) - Worklist.push_back(I); - } - } - if (BranchInst *BI = dyn_cast(I)) { - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); - if (BB->getSinglePredecessor()) - Reduction - += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); - } - } - } while (!Worklist.empty()); - } - - return Reduction; + return Reduction + (CanSROAAlloca ? SROAReduction : 0); } /// analyzeFunction - Fill in the current structure with information gleaned diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll index 91ab40ae163..fb4062f7083 100644 --- a/test/Transforms/Inline/alloca-bonus.ll +++ b/test/Transforms/Inline/alloca-bonus.ll @@ -81,3 +81,44 @@ bb.true: bb.false: ret void } + +define void @outer4(i32 %A) { +; CHECK: @outer4 +; CHECK-NOT: call void @inner4 + %ptr = alloca i32 + call void @inner4(i32* %ptr, i32 %A) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. However, we +; still want to detect that the icmp and branch *can* be handled. +define void @inner4(i32 *%ptr, i32 %A) { + %B = getelementptr i32* %ptr, i32 %A + %E = icmp eq i32* %ptr, null + br i1 %E, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %t1 = load i32* %ptr + %t2 = add i32 %t1, 1 + %t3 = add i32 %t2, 1 + %t4 = add i32 %t3, 1 + %t5 = add i32 %t4, 1 + %t6 = add i32 %t5, 1 + %t7 = add i32 %t6, 1 + %t8 = add i32 %t7, 1 + %t9 = add i32 %t8, 1 + %t10 = add i32 %t9, 1 + %t11 = add i32 %t10, 1 + %t12 = add i32 %t11, 1 + %t13 = add i32 %t12, 1 + %t14 = add i32 %t13, 1 + %t15 = add i32 %t14, 1 + %t16 = add i32 %t15, 1 + %t17 = add i32 %t16, 1 + %t18 = add i32 %t17, 1 + %t19 = add i32 %t18, 1 + %t20 = add i32 %t19, 1 + ret void +bb.false: + ret void +}