1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

Fix PR24469 resulting from r245025 and re-enable dead store elimination across basicblocks.

PR24469 resulted because DeleteDeadInstruction in handleNonLocalStoreDeletion was
deleting the next basic block iterator. Fixed the same by resetting the basic block iterator
post call to DeleteDeadInstruction.

llvm-svn: 245195
This commit is contained in:
Karthik Bhat 2015-08-17 05:51:39 +00:00
parent fc4172a950
commit 334796e2f8
3 changed files with 438 additions and 50 deletions

View File

@ -16,13 +16,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@ -42,6 +45,7 @@ using namespace llvm;
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumCrossBlockStores, "Number of cross block stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
namespace {
@ -49,12 +53,41 @@ namespace {
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
PostDominatorTree *PDT;
const TargetLibraryInfo *TLI;
SmallVector<SmallVector<StoreInst *, 8>, 16> Candidates;
SetVector<StoreInst *> DeadStores;
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32>
BackEdges;
DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> BackEdgesMap;
static char ID; // Pass identification, replacement for typeid
DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
DSE()
: FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr),
PDT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
// Return all stores in a given BasicBlock.
SmallVector<StoreInst *, 8> getStores(BasicBlock *BB) {
SmallVector<StoreInst *, 8> VecStores;
for (auto &BI : *BB) {
if (StoreInst *SI = dyn_cast<StoreInst>(&BI))
VecStores.push_back(SI);
}
return VecStores;
}
// Get dfs in/out on the PDT and populate Candidates store list which
// is used to find potential dead stores for a given block
void populateCandidateStores(Function &F) {
for (auto &I : F) {
DomTreeNode *DTNode = PDT->getNode(&I);
if (!DTNode)
continue;
int DFSIn = DTNode->getDFSNumIn();
SmallVector<StoreInst *, 8> VecStores = getStores(&I);
Candidates[DFSIn] = VecStores;
}
}
bool runOnFunction(Function &F) override {
if (skipOptnoneFunction(F))
@ -64,7 +97,21 @@ namespace {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
PDT = &getAnalysis<PostDominatorTree>();
if (PDT->getRootNode()) {
int Count = PDT->getRootNode()->getDFSNumOut();
SmallVector<StoreInst *, 8> VecStores;
Candidates.resize(Count + 1);
Candidates.assign(Count + 1, VecStores);
// If we have more than 1 block try to populate candidate store.
if (Count > 1) {
populateCandidateStores(F);
FindFunctionBackedges(F, BackEdges);
for (auto I : BackEdges)
BackEdgesMap.insert(I);
}
}
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
@ -83,16 +130,24 @@ namespace {
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
SmallSetVector<Value *, 16> &DeadStackObjects,
const DataLayout &DL);
void handleNonLocalStoreDeletion(StoreInst *SI, BasicBlock::iterator &BBI,
BasicBlock &CurBlock);
bool isSafeCandidateForDeletion(BasicBlock *SrcBlock, BasicBlock *SinkBlock,
StoreInst *SI);
void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value *, 16> *ValueSet = nullptr);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<PostDominatorTree>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
AU.addPreserved<PostDominatorTree>();
}
};
}
@ -102,6 +157,7 @@ INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
@ -111,50 +167,6 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
// Helper functions
//===----------------------------------------------------------------------===//
/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
--NumFastOther;
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
DeadInst->eraseFromParent();
if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
@ -527,10 +539,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
MemDepResult InstDep = MD->getDependency(Inst);
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
if (!InstDep.isDef() && !InstDep.isClobber())
if (!InstDep.isDef() && !InstDep.isClobber() && !InstDep.isNonLocal())
continue;
if (InstDep.isNonLocal()) {
if (!PDT->getRootNode())
continue;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
handleNonLocalStoreDeletion(SI, BBI, BB);
continue;
}
// Figure out what location is being stored to.
MemoryLocation Loc = getLocForWrite(Inst, *AA);
@ -704,6 +721,50 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
}
}
/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
/// If ValueSet is non-null, remove any deleted instructions from it as well.
void DSE::DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value *, 16> *ValueSet) {
SmallVector<Instruction *, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
--NumFastOther;
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
if (StoreInst *SI = dyn_cast<StoreInst>(DeadInst))
DeadStores.insert(SI);
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty())
continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
DeadInst->eraseFromParent();
if (ValueSet)
ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
/// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
bool DSE::HandleFree(CallInst *F) {
@ -931,3 +992,122 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
/// isSafeCandidateForDeletion- Check all paths from the SrcBlock till
/// SinkBlock to see if Store 'SI' is safe to be remove.
/// Returns true if the candidate store SI is safe to delete
/// else returns false.
bool DSE::isSafeCandidateForDeletion(BasicBlock *SrcBlock,
BasicBlock *SinkBlock, StoreInst *SI) {
SmallVector<BasicBlock *, 16> WorkList;
SmallPtrSet<BasicBlock *, 8> Visited;
BasicBlock::iterator BBI(SI);
// Check from the store till end of block and make sure we have no references
// to memory stored by this Store Instruction.
for (auto BI = ++BBI, BE = SrcBlock->end(); BI != BE; ++BI) {
Instruction *I = BI;
StoreInst *CSI = dyn_cast<StoreInst>(I);
if (CSI) {
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CSI));
if (R == MustAlias)
return true;
} else {
ModRefInfo Res = AA->getModRefInfo(I, MemoryLocation::get(SI));
if (Res != MRI_NoModRef)
return false;
}
}
// Add successors of the block to stack and start DFS.
for (succ_iterator I = succ_begin(SrcBlock), E = succ_end(SrcBlock); I != E;
++I) {
if (!Visited.insert(*I).second)
continue;
// A path with backedge may not be safe. Conservatively mark
// this store unsafe.
if (BackEdgesMap.count(std::make_pair(SrcBlock, *I)))
return false;
WorkList.push_back(*I);
}
while (!WorkList.empty()) {
BasicBlock *B = WorkList.pop_back_val();
auto BI = B->begin();
auto BE = B->end();
for (; BI != BE; ++BI) {
Instruction *I = BI;
StoreInst *CSI = dyn_cast<StoreInst>(I);
if (CSI) {
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CSI));
if (R == MustAlias)
break;
} else {
ModRefInfo Res = AA->getModRefInfo(I, MemoryLocation::get(SI));
if (Res != MRI_NoModRef)
return false;
}
}
// If we reached the sink node or we found a block which has a stores that
// overwrites the candidate block we need not look at their successors.
if (B == SinkBlock || BI != BE)
continue;
for (succ_iterator I = succ_begin(B), E = succ_end(B); I != E; ++I) {
if (!Visited.insert(*I).second)
continue;
// A path with backedge may not be safe.Conservatively mark
// this store unsafe.
if (BackEdgesMap.count(std::make_pair(B, *I)))
return false;
WorkList.push_back(*I);
}
}
return true;
}
/// handleNonLocalStoreDeletion - Handle non local dead store elimination.
/// This works by finding candidate stores using PDT and then running DFS
/// from candidate store block checking all paths to make sure the store is
/// safe to delete.
void DSE::handleNonLocalStoreDeletion(StoreInst *SI, BasicBlock::iterator &BBI,
BasicBlock &CurBlock) {
BasicBlock *BB = SI->getParent();
Value *Pointer = SI->getPointerOperand();
DomTreeNode *DTNode = PDT->getNode(BB);
if (!DTNode)
return;
int DFSNumIn = DTNode->getDFSNumIn();
int DFSNumOut = DTNode->getDFSNumOut();
for (int i = DFSNumIn + 1; i < DFSNumOut; ++i) {
for (auto &I : Candidates[i]) {
StoreInst *CandidateSI = I;
if (DeadStores.count(CandidateSI))
continue;
Value *MemPtr = CandidateSI->getPointerOperand();
if (!MemPtr)
continue;
if (Pointer->getType() != MemPtr->getType())
continue;
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CandidateSI));
if (R != MustAlias)
continue;
if (isSafeCandidateForDeletion(CandidateSI->getParent(), BB,
CandidateSI)) {
DeleteDeadInstruction(CandidateSI, *MD, *TLI);
++NumCrossBlockStores;
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = SI;
if (BBI != CurBlock.begin())
--BBI;
}
}
}
}

View File

@ -0,0 +1,104 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@x = common global i32 0
@y = common global i32 0
@a = external global i32
@b = external global i32
define void @test_01(i32 %N) {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
; <label>:4
store i32 5, i32* @x
br label %5
; <label>:5
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_01(
; CHECK-NOT: store i32 10, i32* @x
; CHECK-NOT: store i32 5, i32* @x
; CHECK: store i32 15, i32* @x
define void @test_02(i32 %N) {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
; <label>:4
store i32 5, i32* @x
br label %7
; <label>:5
%6 = load i32, i32* @x
store i32 %6, i32* @y
br label %7
; <label>:7
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_02(
; CHECK: store i32 10, i32* @x
; CHECK-NOT: store i32 5, i32* @x
; CHECK: store i32 %6, i32* @y
define void @test_03(i32 %N) #0 {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %6
; <label>:4 ; preds = %0
%5 = load i32, i32* @x
store i32 %5, i32* @y
br label %6
; <label>:6 ; preds = %4, %0
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_03(
; CHECK: store i32 10, i32* @x
; CHECK: store i32 %5, i32* @y
; CHECK: store i32 15, i32* @x
; Check we safely delete store i32 %g.02, i32* @b below.(PR24469)
define void @test_04() {
entry:
br i1 false, label %for.body, label %for.end.9
for.cond.1.loopexit: ; preds = %for.body
store i32 0, i32* @b
%0 = load i32, i32* @a
br i1 false, label %for.body, label %for.end.9
for.body: ; preds = %for.cond.1.loopexit, %entry
%g.02 = phi i32 [ undef, %entry ], [ %0, %for.cond.1.loopexit ]
store i32 %g.02, i32* @b
br label %for.cond.1.loopexit
for.end.9: ; preds = %for.cond.1.loopexit, %entry
ret void
}
; CHECK-LABEL: @test_04(
; CHECK-NOT: store i32 %g.02, i32* @b
; CHECK-NOT: %g.02 = phi i32 [ undef, %entry ], [ %0, %for.cond.1.loopexit ]
; CHECK-NOT: %0 = load i32, i32* @a

View File

@ -0,0 +1,104 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@A = common global [100 x i32] zeroinitializer, align 16
@x = common global i32 0
; Negative Test case-
;void foo(int N) {
; A[0] = N;
; for(int i=0;i<N;++i)
; A[i]+=i;
; A[0] = 10;
;}
;; Stores should not be optimized away.
define void @test_01(i32 %N) #0 {
%1 = alloca i32
%i = alloca i32
store i32 %N, i32* %1
%2 = load i32, i32* %1
store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
store i32 0, i32* %i
br label %3
; <label>:3 ; preds = %14, %0
%4 = load i32, i32* %i
%5 = load i32, i32* %1
%6 = icmp slt i32 %4, %5
br i1 %6, label %7, label %17
; <label>:7 ; preds = %3
%8 = load i32, i32* %i
%9 = load i32, i32* %i
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds [100 x i32], [100 x i32]* @A, i32 0, i64 %10
%12 = load i32, i32* %11
%13 = add nsw i32 %12, %8
store i32 %13, i32* %11
br label %14
; <label>:14 ; preds = %7
%15 = load i32, i32* %i
%16 = add nsw i32 %15, 1
store i32 %16, i32* %i
br label %3
; <label>:17 ; preds = %3
store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
ret void
}
; CHECK-LABEL: @test_01(
; CHECK: store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; CHECK: store i32 %13, i32* %11
; CHECK: store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; Postive Test case-
;void foo(int N) {
; A[0] = N;
; for(int i=0;i<N;++i)
; A[i]=i;
; A[0] = 10;
;}
;; Stores should not be optimized away.
define void @test_02(i32 %N) #0 {
%1 = alloca i32
%i = alloca i32
store i32 %N, i32* %1
%2 = load i32, i32* %1
store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
store i32 0, i32* %i
br label %3
; <label>:3 ; preds = %12, %0
%4 = load i32, i32* %i
%5 = load i32, i32* %1
%6 = icmp slt i32 %4, %5
br i1 %6, label %7, label %15
; <label>:7 ; preds = %3
%8 = load i32, i32* %i
%9 = load i32, i32* %i
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds [100 x i32], [100 x i32]* @A, i32 0, i64 %10
store i32 %8, i32* %11
br label %12
; <label>:12 ; preds = %7
%13 = load i32, i32* %i
%14 = add nsw i32 %13, 1
store i32 %14, i32* %i
br label %3
; <label>:15 ; preds = %3
store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
ret void
}
; CHECK-LABEL: @test_02(
; CHECK-NOT: store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; CHECK: store i32 %7, i32* %10
; CHECK: store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)