mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Re-reapply r221924: "[GVN] Perform Scalar PRE on gep indices that feed loads before
doing Load PRE" It's not really expected to stick around, last time it provoked a weird LTO build failure that I can't reproduce now, and the bot logs are long gone. I'll re-revert it if the failures recur. Original description: Perform Scalar PRE on gep indices that feed loads before doing Load PRE. llvm-svn: 225536
This commit is contained in:
parent
7363918430
commit
ef7d18507b
@ -20,6 +20,7 @@
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
@ -709,6 +710,7 @@ namespace {
|
||||
void dump(DenseMap<uint32_t, Value*> &d);
|
||||
bool iterateOnFunction(Function &F);
|
||||
bool performPRE(Function &F);
|
||||
bool performScalarPRE(Instruction *I);
|
||||
Value *findLeader(const BasicBlock *BB, uint32_t num);
|
||||
void cleanupGlobalSets();
|
||||
void verifyRemoved(const Instruction *I) const;
|
||||
@ -1728,6 +1730,15 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this load follows a GEP, see if we can PRE the indices before analyzing.
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
|
||||
for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
|
||||
OE = GEP->idx_end();
|
||||
OI != OE; ++OI)
|
||||
if (Instruction *I = dyn_cast<Instruction>(OI->get()))
|
||||
performScalarPRE(I);
|
||||
}
|
||||
|
||||
// Step 2: Analyze the availability of the load
|
||||
AvailValInBlkVect ValuesPerBlock;
|
||||
UnavailBlkVect UnavailableBlocks;
|
||||
@ -2430,175 +2441,182 @@ bool GVN::processBlock(BasicBlock *BB) {
|
||||
return ChangedFunction;
|
||||
}
|
||||
|
||||
bool GVN::performScalarPRE(Instruction *CurInst) {
|
||||
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
|
||||
|
||||
if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
|
||||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
|
||||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
|
||||
isa<DbgInfoIntrinsic>(CurInst))
|
||||
return false;
|
||||
|
||||
// Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
|
||||
// sinking the compare again, and it would force the code generator to
|
||||
// move the i1 from processor flags or predicate registers into a general
|
||||
// purpose register.
|
||||
if (isa<CmpInst>(CurInst))
|
||||
return false;
|
||||
|
||||
// We don't currently value number ANY inline asm calls.
|
||||
if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
|
||||
if (CallI->isInlineAsm())
|
||||
return false;
|
||||
|
||||
uint32_t ValNo = VN.lookup(CurInst);
|
||||
|
||||
// Look for the predecessors for PRE opportunities. We're
|
||||
// only trying to solve the basic diamond case, where
|
||||
// a value is computed in the successor and one predecessor,
|
||||
// but not the other. We also explicitly disallow cases
|
||||
// where the successor is its own predecessor, because they're
|
||||
// more complicated to get right.
|
||||
unsigned NumWith = 0;
|
||||
unsigned NumWithout = 0;
|
||||
BasicBlock *PREPred = nullptr;
|
||||
BasicBlock *CurrentBlock = CurInst->getParent();
|
||||
predMap.clear();
|
||||
|
||||
for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
|
||||
PI != PE; ++PI) {
|
||||
BasicBlock *P = *PI;
|
||||
// We're not interested in PRE where the block is its
|
||||
// own predecessor, or in blocks with predecessors
|
||||
// that are not reachable.
|
||||
if (P == CurrentBlock) {
|
||||
NumWithout = 2;
|
||||
break;
|
||||
} else if (!DT->isReachableFromEntry(P)) {
|
||||
NumWithout = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
Value *predV = findLeader(P, ValNo);
|
||||
if (!predV) {
|
||||
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
|
||||
PREPred = P;
|
||||
++NumWithout;
|
||||
} else if (predV == CurInst) {
|
||||
/* CurInst dominates this predecessor. */
|
||||
NumWithout = 2;
|
||||
break;
|
||||
} else {
|
||||
predMap.push_back(std::make_pair(predV, P));
|
||||
++NumWith;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't do PRE when it might increase code size, i.e. when
|
||||
// we would need to insert instructions in more than one pred.
|
||||
if (NumWithout != 1 || NumWith == 0)
|
||||
return false;
|
||||
|
||||
// Don't do PRE across indirect branch.
|
||||
if (isa<IndirectBrInst>(PREPred->getTerminator()))
|
||||
return false;
|
||||
|
||||
// We can't do PRE safely on a critical edge, so instead we schedule
|
||||
// the edge to be split and perform the PRE the next time we iterate
|
||||
// on the function.
|
||||
unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
|
||||
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
|
||||
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Instantiate the expression in the predecessor that lacked it.
|
||||
// Because we are going top-down through the block, all value numbers
|
||||
// will be available in the predecessor by the time we need them. Any
|
||||
// that weren't originally present will have been instantiated earlier
|
||||
// in this loop.
|
||||
Instruction *PREInstr = CurInst->clone();
|
||||
bool success = true;
|
||||
for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
|
||||
Value *Op = PREInstr->getOperand(i);
|
||||
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
|
||||
continue;
|
||||
|
||||
if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
|
||||
PREInstr->setOperand(i, V);
|
||||
} else {
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fail out if we encounter an operand that is not available in
|
||||
// the PRE predecessor. This is typically because of loads which
|
||||
// are not value numbered precisely.
|
||||
if (!success) {
|
||||
DEBUG(verifyRemoved(PREInstr));
|
||||
delete PREInstr;
|
||||
return false;
|
||||
}
|
||||
|
||||
PREInstr->insertBefore(PREPred->getTerminator());
|
||||
PREInstr->setName(CurInst->getName() + ".pre");
|
||||
PREInstr->setDebugLoc(CurInst->getDebugLoc());
|
||||
VN.add(PREInstr, ValNo);
|
||||
++NumGVNPRE;
|
||||
|
||||
// Update the availability map to include the new instruction.
|
||||
addToLeaderTable(ValNo, PREInstr, PREPred);
|
||||
|
||||
// Create a PHI to make the value available in this block.
|
||||
PHINode *Phi =
|
||||
PHINode::Create(CurInst->getType(), predMap.size(),
|
||||
CurInst->getName() + ".pre-phi", CurrentBlock->begin());
|
||||
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
|
||||
if (Value *V = predMap[i].first)
|
||||
Phi->addIncoming(V, predMap[i].second);
|
||||
else
|
||||
Phi->addIncoming(PREInstr, PREPred);
|
||||
}
|
||||
|
||||
VN.add(Phi, ValNo);
|
||||
addToLeaderTable(ValNo, Phi, CurrentBlock);
|
||||
Phi->setDebugLoc(CurInst->getDebugLoc());
|
||||
CurInst->replaceAllUsesWith(Phi);
|
||||
if (Phi->getType()->getScalarType()->isPointerTy()) {
|
||||
// Because we have added a PHI-use of the pointer value, it has now
|
||||
// "escaped" from alias analysis' perspective. We need to inform
|
||||
// AA of this.
|
||||
for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
|
||||
unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
|
||||
VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
|
||||
}
|
||||
|
||||
if (MD)
|
||||
MD->invalidateCachedPointerInfo(Phi);
|
||||
}
|
||||
VN.erase(CurInst);
|
||||
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
|
||||
|
||||
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
|
||||
if (MD)
|
||||
MD->removeInstruction(CurInst);
|
||||
DEBUG(verifyRemoved(CurInst));
|
||||
CurInst->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// performPRE - Perform a purely local form of PRE that looks for diamond
|
||||
/// control flow patterns and attempts to perform simple PRE at the join point.
|
||||
bool GVN::performPRE(Function &F) {
|
||||
bool Changed = false;
|
||||
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
|
||||
for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
|
||||
// Nothing to PRE in the entry block.
|
||||
if (CurrentBlock == &F.getEntryBlock()) continue;
|
||||
if (CurrentBlock == &F.getEntryBlock())
|
||||
continue;
|
||||
|
||||
// Don't perform PRE on a landing pad.
|
||||
if (CurrentBlock->isLandingPad()) continue;
|
||||
if (CurrentBlock->isLandingPad())
|
||||
continue;
|
||||
|
||||
for (BasicBlock::iterator BI = CurrentBlock->begin(),
|
||||
BE = CurrentBlock->end(); BI != BE; ) {
|
||||
BE = CurrentBlock->end();
|
||||
BI != BE;) {
|
||||
Instruction *CurInst = BI++;
|
||||
|
||||
if (isa<AllocaInst>(CurInst) ||
|
||||
isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
|
||||
CurInst->getType()->isVoidTy() ||
|
||||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
|
||||
isa<DbgInfoIntrinsic>(CurInst))
|
||||
continue;
|
||||
|
||||
// Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
|
||||
// sinking the compare again, and it would force the code generator to
|
||||
// move the i1 from processor flags or predicate registers into a general
|
||||
// purpose register.
|
||||
if (isa<CmpInst>(CurInst))
|
||||
continue;
|
||||
|
||||
// We don't currently value number ANY inline asm calls.
|
||||
if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
|
||||
if (CallI->isInlineAsm())
|
||||
continue;
|
||||
|
||||
uint32_t ValNo = VN.lookup(CurInst);
|
||||
|
||||
// Look for the predecessors for PRE opportunities. We're
|
||||
// only trying to solve the basic diamond case, where
|
||||
// a value is computed in the successor and one predecessor,
|
||||
// but not the other. We also explicitly disallow cases
|
||||
// where the successor is its own predecessor, because they're
|
||||
// more complicated to get right.
|
||||
unsigned NumWith = 0;
|
||||
unsigned NumWithout = 0;
|
||||
BasicBlock *PREPred = nullptr;
|
||||
predMap.clear();
|
||||
|
||||
for (pred_iterator PI = pred_begin(CurrentBlock),
|
||||
PE = pred_end(CurrentBlock); PI != PE; ++PI) {
|
||||
BasicBlock *P = *PI;
|
||||
// We're not interested in PRE where the block is its
|
||||
// own predecessor, or in blocks with predecessors
|
||||
// that are not reachable.
|
||||
if (P == CurrentBlock) {
|
||||
NumWithout = 2;
|
||||
break;
|
||||
} else if (!DT->isReachableFromEntry(P)) {
|
||||
NumWithout = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
Value* predV = findLeader(P, ValNo);
|
||||
if (!predV) {
|
||||
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
|
||||
PREPred = P;
|
||||
++NumWithout;
|
||||
} else if (predV == CurInst) {
|
||||
/* CurInst dominates this predecessor. */
|
||||
NumWithout = 2;
|
||||
break;
|
||||
} else {
|
||||
predMap.push_back(std::make_pair(predV, P));
|
||||
++NumWith;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't do PRE when it might increase code size, i.e. when
|
||||
// we would need to insert instructions in more than one pred.
|
||||
if (NumWithout != 1 || NumWith == 0)
|
||||
continue;
|
||||
|
||||
// Don't do PRE across indirect branch.
|
||||
if (isa<IndirectBrInst>(PREPred->getTerminator()))
|
||||
continue;
|
||||
|
||||
// We can't do PRE safely on a critical edge, so instead we schedule
|
||||
// the edge to be split and perform the PRE the next time we iterate
|
||||
// on the function.
|
||||
unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
|
||||
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
|
||||
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Instantiate the expression in the predecessor that lacked it.
|
||||
// Because we are going top-down through the block, all value numbers
|
||||
// will be available in the predecessor by the time we need them. Any
|
||||
// that weren't originally present will have been instantiated earlier
|
||||
// in this loop.
|
||||
Instruction *PREInstr = CurInst->clone();
|
||||
bool success = true;
|
||||
for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
|
||||
Value *Op = PREInstr->getOperand(i);
|
||||
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
|
||||
continue;
|
||||
|
||||
if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
|
||||
PREInstr->setOperand(i, V);
|
||||
} else {
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fail out if we encounter an operand that is not available in
|
||||
// the PRE predecessor. This is typically because of loads which
|
||||
// are not value numbered precisely.
|
||||
if (!success) {
|
||||
DEBUG(verifyRemoved(PREInstr));
|
||||
delete PREInstr;
|
||||
continue;
|
||||
}
|
||||
|
||||
PREInstr->insertBefore(PREPred->getTerminator());
|
||||
PREInstr->setName(CurInst->getName() + ".pre");
|
||||
PREInstr->setDebugLoc(CurInst->getDebugLoc());
|
||||
VN.add(PREInstr, ValNo);
|
||||
++NumGVNPRE;
|
||||
|
||||
// Update the availability map to include the new instruction.
|
||||
addToLeaderTable(ValNo, PREInstr, PREPred);
|
||||
|
||||
// Create a PHI to make the value available in this block.
|
||||
PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
|
||||
CurInst->getName() + ".pre-phi",
|
||||
CurrentBlock->begin());
|
||||
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
|
||||
if (Value *V = predMap[i].first)
|
||||
Phi->addIncoming(V, predMap[i].second);
|
||||
else
|
||||
Phi->addIncoming(PREInstr, PREPred);
|
||||
}
|
||||
|
||||
VN.add(Phi, ValNo);
|
||||
addToLeaderTable(ValNo, Phi, CurrentBlock);
|
||||
Phi->setDebugLoc(CurInst->getDebugLoc());
|
||||
CurInst->replaceAllUsesWith(Phi);
|
||||
if (Phi->getType()->getScalarType()->isPointerTy()) {
|
||||
// Because we have added a PHI-use of the pointer value, it has now
|
||||
// "escaped" from alias analysis' perspective. We need to inform
|
||||
// AA of this.
|
||||
for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
|
||||
++ii) {
|
||||
unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
|
||||
VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
|
||||
}
|
||||
|
||||
if (MD)
|
||||
MD->invalidateCachedPointerInfo(Phi);
|
||||
}
|
||||
VN.erase(CurInst);
|
||||
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
|
||||
|
||||
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
|
||||
if (MD) MD->removeInstruction(CurInst);
|
||||
DEBUG(verifyRemoved(CurInst));
|
||||
CurInst->eraseFromParent();
|
||||
Changed = true;
|
||||
Changed = performScalarPRE(CurInst);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2636,25 +2654,21 @@ bool GVN::iterateOnFunction(Function &F) {
|
||||
|
||||
// Top-down walk of the dominator tree
|
||||
bool Changed = false;
|
||||
#if 0
|
||||
// Needed for value numbering with phi construction to work.
|
||||
ReversePostOrderTraversal<Function*> RPOT(&F);
|
||||
for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
|
||||
RE = RPOT.end(); RI != RE; ++RI)
|
||||
Changed |= processBlock(*RI);
|
||||
#else
|
||||
// Save the blocks this function have before transformation begins. GVN may
|
||||
// split critical edge, and hence may invalidate the RPO/DT iterator.
|
||||
//
|
||||
std::vector<BasicBlock *> BBVect;
|
||||
BBVect.reserve(256);
|
||||
for (DomTreeNode *X : depth_first(DT->getRootNode()))
|
||||
BBVect.push_back(X->getBlock());
|
||||
// Needed for value numbering with phi construction to work.
|
||||
ReversePostOrderTraversal<Function *> RPOT(&F);
|
||||
for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(),
|
||||
RE = RPOT.end();
|
||||
RI != RE; ++RI)
|
||||
BBVect.push_back(*RI);
|
||||
|
||||
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
|
||||
I != E; I++)
|
||||
Changed |= processBlock(*I);
|
||||
#endif
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -46,12 +46,12 @@ entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
%t = load i32* %p, !tbaa !4
|
||||
%t = load i32* %p, !tbaa !3
|
||||
store i32 %t, i32* %q
|
||||
ret void
|
||||
|
||||
if.else:
|
||||
%u = load i32* %p, !tbaa !3
|
||||
%u = load i32* %p, !tbaa !4
|
||||
store i32 %u, i32* %q
|
||||
ret void
|
||||
}
|
||||
@ -61,11 +61,11 @@ if.else:
|
||||
|
||||
; CHECK: @watch_out_for_another_type_change
|
||||
; CHECK: if.then:
|
||||
; CHECK: %t = load i32* %p
|
||||
; CHECK: store i32 %t, i32* %q
|
||||
; CHECK: store i32 0, i32* %q
|
||||
; CHECK: ret void
|
||||
; CHECK: if.else:
|
||||
; CHECK: store i32 0, i32* %q
|
||||
; CHECK: %u = load i32* %p
|
||||
; CHECK: store i32 %u, i32* %q
|
||||
|
||||
define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
|
||||
entry:
|
||||
@ -74,12 +74,12 @@ entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
%t = load i32* %p, !tbaa !3
|
||||
%t = load i32* %p, !tbaa !4
|
||||
store i32 %t, i32* %q
|
||||
ret void
|
||||
|
||||
if.else:
|
||||
%u = load i32* %p, !tbaa !4
|
||||
%u = load i32* %p, !tbaa !3
|
||||
store i32 %u, i32* %q
|
||||
ret void
|
||||
}
|
||||
|
49
test/Transforms/GVN/pre-gep-load.ll
Normal file
49
test/Transforms/GVN/pre-gep-load.ll
Normal file
@ -0,0 +1,49 @@
|
||||
; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
define double @foo(i32 %stat, i32 %i, double** %p) {
|
||||
; CHECK-LABEL: @foo(
|
||||
entry:
|
||||
switch i32 %stat, label %sw.default [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb
|
||||
i32 2, label %sw.bb2
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %entry, %entry
|
||||
%idxprom = sext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds double** %p, i64 0
|
||||
%0 = load double** %arrayidx, align 8
|
||||
%arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom
|
||||
%1 = load double* %arrayidx1, align 8
|
||||
%sub = fsub double %1, 1.000000e+00
|
||||
%cmp = fcmp olt double %sub, 0.000000e+00
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %sw.bb
|
||||
br label %return
|
||||
|
||||
if.end: ; preds = %sw.bb
|
||||
br label %sw.bb2
|
||||
|
||||
sw.bb2: ; preds = %if.end, %entry
|
||||
%idxprom3 = sext i32 %i to i64
|
||||
%arrayidx4 = getelementptr inbounds double** %p, i64 0
|
||||
%2 = load double** %arrayidx4, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3
|
||||
%3 = load double* %arrayidx5, align 8
|
||||
; CHECK: sw.bb2:
|
||||
; CHECK-NEXT-NOT: sext
|
||||
; CHECK-NEXT: phi double [
|
||||
; CHECK-NOT: load
|
||||
%sub6 = fsub double 3.000000e+00, %3
|
||||
br label %return
|
||||
|
||||
sw.default: ; preds = %entry
|
||||
br label %return
|
||||
|
||||
return: ; preds = %sw.default, %sw.bb2, %if.then
|
||||
%retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
|
||||
ret double %retval.0
|
||||
}
|
Loading…
Reference in New Issue
Block a user