1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

Undo a previous restriction on the inline cost calculation which Nick

introduced. Specifically, there are cost reductions for all
constant-operand icmp instructions against an alloca, regardless of
whether the alloca will in fact be elligible for SROA. That means we
don't want to abort the icmp reduction computation when we abort the
SROA reduction computation. That in turn frees us from the need to keep
a separate worklist and defer the ICmp calculations.

Use this new-found freedom and some judicious function boundaries to
factor the innards of computing the cost factor of any given instruction
out of the loop over the instructions and into static helper functions.
This greatly simplifies the code, and hopefully makes it more clear what
is happening here.

Reviewed by Eric Christopher. There is some concern that we'd like to
ensure this doesn't get out of hand, and I plan to benchmark the effects
of this change over the next few days along with some further fixes to
the inline cost.

llvm-svn: 152368
This commit is contained in:
Chandler Carruth 2012-03-09 02:49:36 +00:00
parent a10cf5e1b9
commit 63f95ab839
2 changed files with 187 additions and 107 deletions

View File

@ -212,15 +212,148 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
return Reduction; return Reduction;
} }
static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
ICmpInst *ICI) {
unsigned Reduction = 0;
// Bail if this is comparing against a non-constant; there is nothing we can
// do there.
if (!isa<Constant>(ICI->getOperand(1)))
return Reduction;
// An icmp pred (alloca, C) becomes true if the predicate is true when
// equal and false otherwise.
bool Result = ICI->isTrueWhenEqual();
SmallVector<Instruction *, 4> Worklist;
Worklist.push_back(ICI);
do {
Instruction *U = Worklist.pop_back_val();
Reduction += InlineConstants::InstrCost;
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
UI != UE; ++UI) {
Instruction *I = dyn_cast<Instruction>(*UI);
if (!I || I->mayHaveSideEffects()) continue;
if (I->getNumOperands() == 1)
Worklist.push_back(I);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
// If BO produces the same value as U, then the other operand is
// irrelevant and we can put it into the Worklist to continue
// deleting dead instructions. If BO produces the same value as the
// other operand, we can delete BO but that's it.
if (Result == true) {
if (BO->getOpcode() == Instruction::Or)
Worklist.push_back(I);
if (BO->getOpcode() == Instruction::And)
Reduction += InlineConstants::InstrCost;
} else {
if (BO->getOpcode() == Instruction::Or ||
BO->getOpcode() == Instruction::Xor)
Reduction += InlineConstants::InstrCost;
if (BO->getOpcode() == Instruction::And)
Worklist.push_back(I);
}
}
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
if (BB->getSinglePredecessor())
Reduction
+= InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
}
}
} while (!Worklist.empty());
return Reduction;
}
/// \brief Compute the reduction possible for a given instruction if we are able
/// to SROA an alloca.
///
/// The reduction for this instruction is added to the SROAReduction output
/// parameter. Returns false if this instruction is expected to defeat SROA in
/// general.
bool countCodeReductionForSROAInst(Instruction *I,
SmallVectorImpl<Value *> &Worklist,
unsigned &SROAReduction) {
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isSimple())
return false;
SROAReduction += InlineConstants::InstrCost;
return true;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (!SI->isSimple())
return false;
SROAReduction += InlineConstants::InstrCost;
return true;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has variable indices, we won't be able to do much with it.
if (!GEP->hasAllConstantIndices())
return false;
// A non-zero GEP will likely become a mask operation after SROA.
if (GEP->hasAllZeroIndices())
SROAReduction += InlineConstants::InstrCost;
Worklist.push_back(GEP);
return true;
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
// Track pointer through bitcasts.
Worklist.push_back(BCI);
SROAReduction += InlineConstants::InstrCost;
return true;
}
// We just look for non-constant operands to ICmp instructions as those will
// defeat SROA. The actual reduction for these happens even without SROA.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
return isa<Constant>(ICI->getOperand(1));
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
// SROA can handle a select of alloca iff all uses of the alloca are
// loads, and dereferenceable. We assume it's dereferenceable since
// we're told the input is an alloca.
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple())
return false;
}
// We don't know whether we'll be deleting the rest of the chain of
// instructions from the SelectInst on, because we don't know whether
// the other side of the select is also an alloca or not.
return true;
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default:
return false;
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
// SROA can usually chew through these intrinsics.
SROAReduction += InlineConstants::InstrCost;
return true;
}
}
// If there is some other strange instruction, we're not going to be
// able to do much if we inline this.
return false;
}
unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
const CodeMetrics &Metrics, Value *V) { const CodeMetrics &Metrics, Value *V) {
if (!V->getType()->isPointerTy()) return 0; // Not a pointer if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0; unsigned Reduction = 0;
unsigned SROAReduction = 0;
// Looking at ICmpInsts will never abort the analysis and return zero, and bool CanSROAAlloca = true;
// analyzing them is expensive, so save them for last so that we don't do
// extra work that we end up throwing out.
SmallVector<ICmpInst *, 4> ICmpInsts;
SmallVector<Value *, 4> Worklist; SmallVector<Value *, 4> Worklist;
Worklist.push_back(V); Worklist.push_back(V);
@ -229,111 +362,17 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ++UI){ UI != E; ++UI){
Instruction *I = cast<Instruction>(*UI); Instruction *I = cast<Instruction>(*UI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isSimple()) if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
return 0; Reduction += countCodeReductionForAllocaICmp(Metrics, ICI);
Reduction += InlineConstants::InstrCost;
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (CanSROAAlloca)
if (!SI->isSimple()) CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist,
return 0; SROAReduction);
Reduction += InlineConstants::InstrCost;
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has variable indices, we won't be able to do much with it.
if (!GEP->hasAllConstantIndices())
return 0;
// A non-zero GEP will likely become a mask operation after SROA.
if (GEP->hasAllZeroIndices())
Reduction += InlineConstants::InstrCost;
Worklist.push_back(GEP);
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
// Track pointer through bitcasts.
Worklist.push_back(BCI);
Reduction += InlineConstants::InstrCost;
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
// SROA can handle a select of alloca iff all uses of the alloca are
// loads, and dereferenceable. We assume it's dereferenceable since
// we're told the input is an alloca.
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return 0;
}
// We don't know whether we'll be deleting the rest of the chain of
// instructions from the SelectInst on, because we don't know whether
// the other side of the select is also an alloca or not.
continue;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default:
return 0;
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
// SROA can usually chew through these intrinsics.
Reduction += InlineConstants::InstrCost;
break;
}
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<Constant>(ICI->getOperand(1)))
return 0;
ICmpInsts.push_back(ICI);
} else {
// If there is some other strange instruction, we're not going to be
// able to do much if we inline this.
return 0;
}
} }
} while (!Worklist.empty()); } while (!Worklist.empty());
while (!ICmpInsts.empty()) { return Reduction + (CanSROAAlloca ? SROAReduction : 0);
ICmpInst *ICI = ICmpInsts.pop_back_val();
// An icmp pred (alloca, C) becomes true if the predicate is true when
// equal and false otherwise.
bool Result = ICI->isTrueWhenEqual();
SmallVector<Instruction *, 4> Worklist;
Worklist.push_back(ICI);
do {
Instruction *U = Worklist.pop_back_val();
Reduction += InlineConstants::InstrCost;
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
UI != UE; ++UI) {
Instruction *I = dyn_cast<Instruction>(*UI);
if (!I || I->mayHaveSideEffects()) continue;
if (I->getNumOperands() == 1)
Worklist.push_back(I);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
// If BO produces the same value as U, then the other operand is
// irrelevant and we can put it into the Worklist to continue
// deleting dead instructions. If BO produces the same value as the
// other operand, we can delete BO but that's it.
if (Result == true) {
if (BO->getOpcode() == Instruction::Or)
Worklist.push_back(I);
if (BO->getOpcode() == Instruction::And)
Reduction += InlineConstants::InstrCost;
} else {
if (BO->getOpcode() == Instruction::Or ||
BO->getOpcode() == Instruction::Xor)
Reduction += InlineConstants::InstrCost;
if (BO->getOpcode() == Instruction::And)
Worklist.push_back(I);
}
}
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
if (BB->getSinglePredecessor())
Reduction
+= InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
}
}
} while (!Worklist.empty());
}
return Reduction;
} }
/// analyzeFunction - Fill in the current structure with information gleaned /// analyzeFunction - Fill in the current structure with information gleaned

View File

@ -81,3 +81,44 @@ bb.true:
bb.false: bb.false:
ret void ret void
} }
define void @outer4(i32 %A) {
; CHECK: @outer4
; CHECK-NOT: call void @inner4
%ptr = alloca i32
call void @inner4(i32* %ptr, i32 %A)
ret void
}
; %D poisons this call, scalar-repl can't handle that instruction. However, we
; still want to detect that the icmp and branch *can* be handled.
define void @inner4(i32 *%ptr, i32 %A) {
%B = getelementptr i32* %ptr, i32 %A
%E = icmp eq i32* %ptr, null
br i1 %E, label %bb.true, label %bb.false
bb.true:
; This block musn't be counted in the inline cost.
%t1 = load i32* %ptr
%t2 = add i32 %t1, 1
%t3 = add i32 %t2, 1
%t4 = add i32 %t3, 1
%t5 = add i32 %t4, 1
%t6 = add i32 %t5, 1
%t7 = add i32 %t6, 1
%t8 = add i32 %t7, 1
%t9 = add i32 %t8, 1
%t10 = add i32 %t9, 1
%t11 = add i32 %t10, 1
%t12 = add i32 %t11, 1
%t13 = add i32 %t12, 1
%t14 = add i32 %t13, 1
%t15 = add i32 %t14, 1
%t16 = add i32 %t15, 1
%t17 = add i32 %t16, 1
%t18 = add i32 %t17, 1
%t19 = add i32 %t18, 1
%t20 = add i32 %t19, 1
ret void
bb.false:
ret void
}