1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

NewGVN: Don't propagate over phi backedges where undef causes us to

have >1 value, unless we can prove the phi node is cycle free.

Fixes PR 32607.

llvm-svn: 300299
This commit is contained in:
Daniel Berlin 2017-04-14 02:53:37 +00:00
parent 9954b1e7ea
commit 584e0fc63c
2 changed files with 182 additions and 8 deletions

View File

@ -63,6 +63,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@ -81,7 +82,6 @@
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/VNCoercion.h"
#include <numeric>
@ -133,6 +133,77 @@ PHIExpression::~PHIExpression() = default;
}
}
// Tarjan's SCC finding algorithm with Nuutila's improvements
// SCCIterator is actually fairly complex for the simple thing we want.
// It also wants to hand us SCC's that are unrelated to the phi node we ask
// about, and have us process them there or risk redoing work.
// Graph traits over a filter iterator also doesn't work that well here.
// This SCC finder is specialized to walk use-def chains, and only follows instructions,
// not generic values (arguments, etc).
struct TarjanSCC {
TarjanSCC() : Components(1) {}
void Start(const Instruction *Start) {
if (Root.lookup(Start) == 0)
FindSCC(Start);
}
const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const {
unsigned ComponentID = ValueToComponent.lookup(V);
assert(ComponentID > 0 &&
"Asking for a component for a value we never processed");
return Components[ComponentID];
}
private:
void FindSCC(const Instruction *I) {
Root[I] = ++DFSNum;
// Store the DFS Number we had before it possibly gets incremented.
unsigned int OurDFS = DFSNum;
for (auto &Op : I->operands()) {
if (auto *InstOp = dyn_cast<Instruction>(Op)) {
if (Root.lookup(Op) == 0)
FindSCC(InstOp);
if (!InComponent.count(Op))
Root[I] = std::min(Root.lookup(I), Root.lookup(Op));
}
}
// See if we really were the root of a component, by seeing if we still have our DFSNumber.
// If we do, we are the root of the component, and we have completed a component. If we do not,
// we are not the root of a component, and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
Components.resize(Components.size() + 1);
auto &Component = Components.back();
Component.insert(I);
DEBUG(dbgs() << "Component root is " << *I << "\n");
InComponent.insert(I);
ValueToComponent[I] = ComponentID;
// Pop a component off the stack and label it.
while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) {
auto *Member = Stack.back();
DEBUG(dbgs() << "Component member is " << *Member << "\n");
Component.insert(Member);
InComponent.insert(Member);
ValueToComponent[Member] = ComponentID;
Stack.pop_back();
}
} else {
// Part of a component, push to stack
Stack.push_back(I);
}
}
unsigned int DFSNum = 1;
SmallPtrSet<const Value *, 8> InComponent;
DenseMap<const Value *, unsigned int> Root;
SmallVector<const Value *, 8> Stack;
// Store the components as vector of ptr sets, because we need the topo order
// of SCC's, but not individual member order
SmallVector<SmallPtrSet<const Value *, 8>, 8> Components;
DenseMap<const Value *, unsigned> ValueToComponent;
};
// Congruence classes represent the set of expressions/instructions
// that are all the same *during some scope in the function*.
// That is, because of the way we perform equality propagation, and
@ -330,10 +401,14 @@ class NewGVN {
std::unique_ptr<PredicateInfo> PredInfo;
BumpPtrAllocator ExpressionAllocator;
ArrayRecycler<Value *> ArgRecycler;
TarjanSCC SCCFinder;
// Number of function arguments, used by ranking
unsigned int NumFuncArgs;
// RPOOrdering of basic blocks
DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
// Congruence class info.
// This class is called INITIAL in the paper. It is the class everything
@ -378,6 +453,8 @@ class NewGVN {
enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
@ -432,7 +509,8 @@ private:
// Expression handling.
const Expression *createExpression(Instruction *);
const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);
PHIExpression *createPHIExpression(Instruction *);
PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,
bool &AllConstant);
const VariableExpression *createVariableExpression(Value *);
const ConstantExpression *createConstantExpression(Constant *);
const Expression *createVariableOrConstant(Value *V);
@ -572,7 +650,7 @@ private:
? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
: InstrDFS.lookup(MA);
}
bool isCycleFree(const PHINode *PN);
template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
@ -627,7 +705,8 @@ void NewGVN::deleteExpression(const Expression *E) {
ExpressionAllocator.Deallocate(E);
}
PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
bool &AllConstant) {
BasicBlock *PHIBlock = I->getParent();
auto *PN = cast<PHINode>(I);
auto *E =
@ -637,6 +716,8 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
E->setType(I->getType());
E->setOpcode(I->getOpcode());
unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
// Filter out unreachable phi operands.
auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {
return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});
@ -644,6 +725,12 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use &U) -> Value * {
auto *BB = PN->getIncomingBlock(U);
auto *DTN = DT->getNode(BB);
if (RPOOrdering.lookup(DTN) >= PHIRPO)
HasBackedge = true;
AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);
// Don't try to transform self-defined phis.
if (U == PN)
return PN;
@ -1313,9 +1400,52 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
return Changed;
}
// Determine if a phi is cycle-free. That means the values in the phi don't
// depend on any expressions that can change value as a result of the phi.
// For example, a non-cycle free phi would be v = phi(0, v+1).
bool NewGVN::isCycleFree(const PHINode *PN) {
// In order to compute cycle-freeness, we do SCC finding on the phi, and see
// what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
// If it is not in a singleton, it is only cycle free if the other members are
// all phi nodes (as they do not compute anything, they are copies). TODO:
// There are likely a few other intrinsics or expressions that could be
// included here, but this happens so infrequently already that it is not
// likely to be worth it.
auto PCS = PhiCycleState.lookup(PN);
if (PCS == PCS_Unknown) {
SCCFinder.Start(PN);
auto &SCC = SCCFinder.getComponentFor(PN);
// It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
if (SCC.size() == 1)
PhiCycleState.insert({PN, PCS_CycleFree});
else {
bool AllPhis =
llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
for (auto *Member : SCC)
if (auto *MemberPhi = dyn_cast<PHINode>(Member))
PhiCycleState.insert({MemberPhi, PCS});
}
}
if (PCS == PCS_Cycle)
return false;
return true;
}
// Evaluate PHI nodes symbolically, and create an expression result.
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
auto *E = cast<PHIExpression>(createPHIExpression(I));
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
// were constant.
// This is really shorthand for "this phi cannot cycle due to forward
// propagation", as any
// change in value of the phi is guaranteed not to later change the value of
// the phi.
// IE it can't be v = phi(undef, v+1)
bool AllConstant = true;
auto *E =
cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
// We match the semantics of SimplifyPhiNode from InstructionSimplify here.
// See if all arguaments are the same.
@ -1337,10 +1467,12 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
deleteExpression(E);
return createConstantExpression(UndefValue::get(I->getType()));
}
unsigned NumOps = 0;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) {
++NumOps;
return V == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
@ -1353,6 +1485,16 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
// We also special case undef, so that if we have an undef, we can't use the
// common value unless it dominates the phi block.
if (HasUndef) {
// If we have undef and at least one other value, this is really a
// multivalued phi, and we need to know if it's cycle free in order to
// evaluate whether we can ignore the undef. The other parts of this are
// just shortcuts. If there is no backedge, or all operands are
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
!isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
return E;
// Only have to check for instructions
if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
if (!someEquivalentDominates(AllSameInst, I))
@ -2559,7 +2701,6 @@ bool NewGVN::runGVN() {
// The dominator tree does guarantee that, for a given dom tree node, it's
// parent must occur before it in the RPO ordering. Thus, we only need to sort
// the siblings.
DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
ReversePostOrderTraversal<Function *> RPOT(&F);
unsigned Counter = 0;
for (auto &B : RPOT) {
@ -2572,7 +2713,7 @@ bool NewGVN::runGVN() {
auto *Node = DT->getNode(B);
if (Node->getChildren().size() > 1)
std::sort(Node->begin(), Node->end(),
[&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) {
[&](const DomTreeNode *A, const DomTreeNode *B) {
return RPOOrdering[A] < RPOOrdering[B];
});
}

View File

@ -0,0 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -newgvn %s -S -o - | FileCheck %s
define hidden void @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: top:
; CHECK-NEXT: br label [[IF:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[TMP1:%.*]], [[IF]] ], [ undef, [[TOP:%.*]] ]
; CHECK-NEXT: [[TMP1]] = fadd double [[TMP0]], undef
; CHECK-NEXT: br i1 false, label [[L50:%.*]], label [[IF]]
; CHECK: L50:
; CHECK-NEXT: store i8 undef, i8* null
; CHECK-NEXT: ret void
;
top:
%.promoted = load double, double* undef, align 8
br label %if
;; This is really a multi-valued phi, because the phi is defined by an expression of the phi.
;; This means that we can't propagate the value over the backedge, because we'll just cycle
;; through every value.
if: ; preds = %if, %top
%0 = phi double [ %1, %if ], [ %.promoted, %top ]
%1 = fadd double %0, undef
br i1 false, label %L50, label %if
L50: ; preds = %if
%.lcssa = phi double [ %1, %if ]
store double %.lcssa, double* undef, align 8
ret void
}