1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

limit the number of instructions per block examined by dead store elimination

Summary: Dead store elimination gets very expensive when large numbers of instructions need to be analyzed. This patch limits the number of instructions analyzed per store to the value of the memdep-block-scan-limit parameter (which defaults to 100). This resulted in no observed difference in performance of the generated code, and no change in the statistics for the dead store elimination pass, but improved compilation time on some files by more than an order of magnitude.

Reviewers: dexonsmith, bruno, george.burgess.iv, dberlin, reames, davidxl

Subscribers: davide, chandlerc, dberlin, davidxl, eraman, tejohnson, mbodart, llvm-commits

Differential Revision: https://reviews.llvm.org/D15537

llvm-svn: 279833
This commit is contained in:
Bob Haarman 2016-08-26 16:34:27 +00:00
parent 2c5173b70a
commit 7dc400a765
3 changed files with 41 additions and 11 deletions

View File

@ -350,6 +350,11 @@ public:
DominatorTree &DT) DominatorTree &DT)
: AA(AA), AC(AC), TLI(TLI), DT(DT) {} : AA(AA), AC(AC), TLI(TLI), DT(DT) {}
/// Some methods limit the number of instructions they will examine.
/// The return value of this method is the default limit that will be
/// used if no limit is explicitly passed in.
unsigned getDefaultBlockScanLimit() const;
/// Returns the instruction on which a memory operation depends. /// Returns the instruction on which a memory operation depends.
/// ///
/// See the class comment for more details. It is illegal to call this on /// See the class comment for more details. It is illegal to call this on
@ -409,19 +414,25 @@ public:
/// operations. If isLoad is false, this routine ignores may-aliases /// operations. If isLoad is false, this routine ignores may-aliases
/// with reads from read-only locations. If possible, pass the query /// with reads from read-only locations. If possible, pass the query
/// instruction as well; this function may take advantage of the metadata /// instruction as well; this function may take advantage of the metadata
/// annotated to the query instruction to refine the result. /// annotated to the query instruction to refine the result. \p Limit
/// can be used to set the maximum number of instructions that will be
/// examined to find the pointer dependency. On return, it will be set to
/// the number of instructions left to examine. If a null pointer is passed
/// in, the limit will default to the value of -memdep-block-scan-limit.
/// ///
/// Note that this is an uncached query, and thus may be inefficient. /// Note that this is an uncached query, and thus may be inefficient.
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock::iterator ScanIt,
BasicBlock *BB, BasicBlock *BB,
Instruction *QueryInst = nullptr); Instruction *QueryInst = nullptr,
unsigned *Limit = nullptr);
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
bool isLoad, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock::iterator ScanIt,
BasicBlock *BB, BasicBlock *BB,
Instruction *QueryInst); Instruction *QueryInst,
unsigned *Limit = nullptr);
/// This analysis looks for other loads and stores with invariant.group /// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not /// metadata and the same pointer operand. Returns Unknown if it does not

View File

@ -341,7 +341,7 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom( MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst) { BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
if (QueryInst != nullptr) { if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@ -352,7 +352,8 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
return invariantGroupDependency; return invariantGroupDependency;
} }
} }
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
Limit);
} }
MemDepResult MemDepResult
@ -408,12 +409,18 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst) { BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
const Value *MemLocBase = nullptr; const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0; int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false; bool isInvariantLoad = false;
if (!Limit) {
unsigned DefaultLimit = BlockScanLimit;
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
&DefaultLimit);
}
// We must be careful with atomic accesses, as they may allow another thread // We must be careful with atomic accesses, as they may allow another thread
// to touch this location, clobbering it. We are conservative: if the // to touch this location, clobbering it. We are conservative: if the
// QueryInst is not a simple (non-atomic) memory access, we automatically // QueryInst is not a simple (non-atomic) memory access, we automatically
@ -487,8 +494,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Limit the amount of scanning we do so we don't end up with quadratic // Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases. // running time on extreme testcases.
--Limit; --*Limit;
if (!Limit) if (!*Limit)
return MemDepResult::getUnknown(); return MemDepResult::getUnknown();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@ -1712,6 +1719,10 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
} }
unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
return BlockScanLimit;
}
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

View File

@ -1050,6 +1050,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (!Loc.Ptr) if (!Loc.Ptr)
continue; continue;
// Loop until we find a store we can eliminate or a load that
// invalidates the analysis. Without an upper bound on the number of
// instructions examined, this analysis can become very time-consuming.
// However, the potential gain diminishes as we process more instructions
// without eliminating any of them. Therefore, we limit the number of
// instructions we look at.
auto Limit = MD->getDefaultBlockScanLimit();
while (InstDep.isDef() || InstDep.isClobber()) { while (InstDep.isDef() || InstDep.isClobber()) {
// Get the memory clobbered by the instruction we depend on. MemDep will // Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we // skip any instructions that 'Loc' clearly doesn't interact with. If we
@ -1138,8 +1145,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
break; break;
InstDep = MD->getPointerDependencyFrom(Loc, false, InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
DepWrite->getIterator(), &BB); DepWrite->getIterator(), &BB,
/*QueryInst=*/ nullptr, &Limit);
} }
} }