mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
ce9aba2e75
The latter is more readable and more efficient. While there clean up some double lookups. NFCI.
512 lines
19 KiB
C++
512 lines
19 KiB
C++
//===- DependenceGraphBuilder.cpp ------------------------------------------==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// This file implements common steps of the build algorithm for construction
|
|
// of dependence graphs such as DDG and PDG.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Analysis/DependenceGraphBuilder.h"
|
|
#include "llvm/ADT/DepthFirstIterator.h"
|
|
#include "llvm/ADT/EnumeratedArray.h"
|
|
#include "llvm/ADT/SCCIterator.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Analysis/DDG.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "dgb"
|
|
|
|
STATISTIC(TotalGraphs, "Number of dependence graphs created.");
|
|
STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
|
|
STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
|
|
STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
|
|
STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
|
|
STATISTIC(TotalConfusedEdges,
|
|
"Number of confused memory dependencies between two nodes.");
|
|
STATISTIC(TotalEdgeReversals,
|
|
"Number of times the source and sink of dependence was reversed to "
|
|
"expose cycles in the graph.");
|
|
|
|
using InstructionListType = SmallVector<Instruction *, 2>;
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// AbstractDependenceGraphBuilder implementation
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
template <class G>
|
|
void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
|
|
// The BBList is expected to be in program order.
|
|
size_t NextOrdinal = 1;
|
|
for (auto *BB : BBList)
|
|
for (auto &I : *BB)
|
|
InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
|
|
}
|
|
|
|
template <class G>
|
|
void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
|
|
++TotalGraphs;
|
|
assert(IMap.empty() && "Expected empty instruction map at start");
|
|
for (BasicBlock *BB : BBList)
|
|
for (Instruction &I : *BB) {
|
|
auto &NewNode = createFineGrainedNode(I);
|
|
IMap.insert(std::make_pair(&I, &NewNode));
|
|
NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
|
|
++TotalFineGrainedNodes;
|
|
}
|
|
}
|
|
|
|
template <class G>
|
|
void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
|
|
// Create a root node that connects to every connected component of the graph.
|
|
// This is done to allow graph iterators to visit all the disjoint components
|
|
// of the graph, in a single walk.
|
|
//
|
|
// This algorithm works by going through each node of the graph and for each
|
|
// node N, do a DFS starting from N. A rooted edge is established between the
|
|
// root node and N (if N is not yet visited). All the nodes reachable from N
|
|
// are marked as visited and are skipped in the DFS of subsequent nodes.
|
|
//
|
|
// Note: This algorithm tries to limit the number of edges out of the root
|
|
// node to some extent, but there may be redundant edges created depending on
|
|
// the iteration order. For example for a graph {A -> B}, an edge from the
|
|
// root node is added to both nodes if B is visited before A. While it does
|
|
// not result in minimal number of edges, this approach saves compile-time
|
|
// while keeping the number of edges in check.
|
|
auto &RootNode = createRootNode();
|
|
df_iterator_default_set<const NodeType *, 4> Visited;
|
|
for (auto *N : Graph) {
|
|
if (*N == RootNode)
|
|
continue;
|
|
for (auto I : depth_first_ext(N, Visited))
|
|
if (I == N)
|
|
createRootedEdge(RootNode, *N);
|
|
}
|
|
}
|
|
|
|
template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
|
|
if (!shouldCreatePiBlocks())
|
|
return;
|
|
|
|
LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
|
|
|
|
// The overall algorithm is as follows:
|
|
// 1. Identify SCCs and for each SCC create a pi-block node containing all
|
|
// the nodes in that SCC.
|
|
// 2. Identify incoming edges incident to the nodes inside of the SCC and
|
|
// reconnect them to the pi-block node.
|
|
// 3. Identify outgoing edges from the nodes inside of the SCC to nodes
|
|
// outside of it and reconnect them so that the edges are coming out of the
|
|
// SCC node instead.
|
|
|
|
// Adding nodes as we iterate through the SCCs cause the SCC
|
|
// iterators to get invalidated. To prevent this invalidation, we first
|
|
// collect a list of nodes that are part of an SCC, and then iterate over
|
|
// those lists to create the pi-block nodes. Each element of the list is a
|
|
// list of nodes in an SCC. Note: trivial SCCs containing a single node are
|
|
// ignored.
|
|
SmallVector<NodeListType, 4> ListOfSCCs;
|
|
for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
|
|
if (SCC.size() > 1)
|
|
ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
|
|
}
|
|
|
|
for (NodeListType &NL : ListOfSCCs) {
|
|
LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
|
|
<< " nodes in it.\n");
|
|
|
|
// SCC iterator may put the nodes in an order that's different from the
|
|
// program order. To preserve original program order, we sort the list of
|
|
// nodes based on ordinal numbers computed earlier.
|
|
llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
|
|
return getOrdinal(*LHS) < getOrdinal(*RHS);
|
|
});
|
|
|
|
NodeType &PiNode = createPiBlock(NL);
|
|
++TotalPiBlockNodes;
|
|
|
|
// Build a set to speed up the lookup for edges whose targets
|
|
// are inside the SCC.
|
|
SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
|
|
|
|
// We have the set of nodes in the SCC. We go through the set of nodes
|
|
// that are outside of the SCC and look for edges that cross the two sets.
|
|
for (NodeType *N : Graph) {
|
|
|
|
// Skip the SCC node and all the nodes inside of it.
|
|
if (*N == PiNode || NodesInSCC.count(N))
|
|
continue;
|
|
|
|
for (NodeType *SCCNode : NL) {
|
|
|
|
enum Direction {
|
|
Incoming, // Incoming edges to the SCC
|
|
Outgoing, // Edges going ot of the SCC
|
|
DirectionCount // To make the enum usable as an array index.
|
|
};
|
|
|
|
// Use these flags to help us avoid creating redundant edges. If there
|
|
// are more than one edges from an outside node to inside nodes, we only
|
|
// keep one edge from that node to the pi-block node. Similarly, if
|
|
// there are more than one edges from inside nodes to an outside node,
|
|
// we only keep one edge from the pi-block node to the outside node.
|
|
// There is a flag defined for each direction (incoming vs outgoing) and
|
|
// for each type of edge supported, using a two-dimensional boolean
|
|
// array.
|
|
using EdgeKind = typename EdgeType::EdgeKind;
|
|
EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
|
|
false, false};
|
|
|
|
auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
|
|
const EdgeKind K) {
|
|
switch (K) {
|
|
case EdgeKind::RegisterDefUse:
|
|
createDefUseEdge(Src, Dst);
|
|
break;
|
|
case EdgeKind::MemoryDependence:
|
|
createMemoryEdge(Src, Dst);
|
|
break;
|
|
case EdgeKind::Rooted:
|
|
createRootedEdge(Src, Dst);
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unsupported type of edge.");
|
|
}
|
|
};
|
|
|
|
auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
|
|
const Direction Dir) {
|
|
if (!Src->hasEdgeTo(*Dst))
|
|
return;
|
|
LLVM_DEBUG(dbgs()
|
|
<< "reconnecting("
|
|
<< (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
|
|
<< ":\nSrc:" << *Src << "\nDst:" << *Dst
|
|
<< "\nNew:" << *New << "\n");
|
|
assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
|
|
"Invalid direction.");
|
|
|
|
SmallVector<EdgeType *, 10> EL;
|
|
Src->findEdgesTo(*Dst, EL);
|
|
for (EdgeType *OldEdge : EL) {
|
|
EdgeKind Kind = OldEdge->getKind();
|
|
if (!EdgeAlreadyCreated[Dir][Kind]) {
|
|
if (Dir == Direction::Incoming) {
|
|
createEdgeOfKind(*Src, *New, Kind);
|
|
LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
|
|
} else if (Dir == Direction::Outgoing) {
|
|
createEdgeOfKind(*New, *Dst, Kind);
|
|
LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
|
|
}
|
|
EdgeAlreadyCreated[Dir][Kind] = true;
|
|
}
|
|
Src->removeEdge(*OldEdge);
|
|
destroyEdge(*OldEdge);
|
|
LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
|
|
}
|
|
};
|
|
|
|
// Process incoming edges incident to the pi-block node.
|
|
reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
|
|
|
|
// Process edges that are coming out of the pi-block node.
|
|
reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ordinal maps are no longer needed.
|
|
InstOrdinalMap.clear();
|
|
NodeOrdinalMap.clear();
|
|
|
|
LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
|
|
}
|
|
|
|
template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
|
|
for (NodeType *N : Graph) {
|
|
InstructionListType SrcIList;
|
|
N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
|
|
|
|
// Use a set to mark the targets that we link to N, so we don't add
|
|
// duplicate def-use edges when more than one instruction in a target node
|
|
// use results of instructions that are contained in N.
|
|
SmallPtrSet<NodeType *, 4> VisitedTargets;
|
|
|
|
for (Instruction *II : SrcIList) {
|
|
for (User *U : II->users()) {
|
|
Instruction *UI = dyn_cast<Instruction>(U);
|
|
if (!UI)
|
|
continue;
|
|
NodeType *DstNode = nullptr;
|
|
if (IMap.find(UI) != IMap.end())
|
|
DstNode = IMap.find(UI)->second;
|
|
|
|
// In the case of loops, the scope of the subgraph is all the
|
|
// basic blocks (and instructions within them) belonging to the loop. We
|
|
// simply ignore all the edges coming from (or going into) instructions
|
|
// or basic blocks outside of this range.
|
|
if (!DstNode) {
|
|
LLVM_DEBUG(
|
|
dbgs()
|
|
<< "skipped def-use edge since the sink" << *UI
|
|
<< " is outside the range of instructions being considered.\n");
|
|
continue;
|
|
}
|
|
|
|
// Self dependencies are ignored because they are redundant and
|
|
// uninteresting.
|
|
if (DstNode == N) {
|
|
LLVM_DEBUG(dbgs()
|
|
<< "skipped def-use edge since the sink and the source ("
|
|
<< N << ") are the same.\n");
|
|
continue;
|
|
}
|
|
|
|
if (VisitedTargets.insert(DstNode).second) {
|
|
createDefUseEdge(*N, *DstNode);
|
|
++TotalDefUseEdges;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class G>
|
|
void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
|
|
using DGIterator = typename G::iterator;
|
|
auto isMemoryAccess = [](const Instruction *I) {
|
|
return I->mayReadOrWriteMemory();
|
|
};
|
|
for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
|
|
InstructionListType SrcIList;
|
|
(*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
|
|
if (SrcIList.empty())
|
|
continue;
|
|
|
|
for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
|
|
if (**SrcIt == **DstIt)
|
|
continue;
|
|
InstructionListType DstIList;
|
|
(*DstIt)->collectInstructions(isMemoryAccess, DstIList);
|
|
if (DstIList.empty())
|
|
continue;
|
|
bool ForwardEdgeCreated = false;
|
|
bool BackwardEdgeCreated = false;
|
|
for (Instruction *ISrc : SrcIList) {
|
|
for (Instruction *IDst : DstIList) {
|
|
auto D = DI.depends(ISrc, IDst, true);
|
|
if (!D)
|
|
continue;
|
|
|
|
// If we have a dependence with its left-most non-'=' direction
|
|
// being '>' we need to reverse the direction of the edge, because
|
|
// the source of the dependence cannot occur after the sink. For
|
|
// confused dependencies, we will create edges in both directions to
|
|
// represent the possibility of a cycle.
|
|
|
|
auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
|
|
if (!ForwardEdgeCreated) {
|
|
createMemoryEdge(Src, Dst);
|
|
++TotalMemoryEdges;
|
|
}
|
|
if (!BackwardEdgeCreated) {
|
|
createMemoryEdge(Dst, Src);
|
|
++TotalMemoryEdges;
|
|
}
|
|
ForwardEdgeCreated = BackwardEdgeCreated = true;
|
|
++TotalConfusedEdges;
|
|
};
|
|
|
|
auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
|
|
if (!ForwardEdgeCreated) {
|
|
createMemoryEdge(Src, Dst);
|
|
++TotalMemoryEdges;
|
|
}
|
|
ForwardEdgeCreated = true;
|
|
};
|
|
|
|
auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
|
|
if (!BackwardEdgeCreated) {
|
|
createMemoryEdge(Dst, Src);
|
|
++TotalMemoryEdges;
|
|
}
|
|
BackwardEdgeCreated = true;
|
|
};
|
|
|
|
if (D->isConfused())
|
|
createConfusedEdges(**SrcIt, **DstIt);
|
|
else if (D->isOrdered() && !D->isLoopIndependent()) {
|
|
bool ReversedEdge = false;
|
|
for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
|
|
if (D->getDirection(Level) == Dependence::DVEntry::EQ)
|
|
continue;
|
|
else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
|
|
createBackwardEdge(**SrcIt, **DstIt);
|
|
ReversedEdge = true;
|
|
++TotalEdgeReversals;
|
|
break;
|
|
} else if (D->getDirection(Level) == Dependence::DVEntry::LT)
|
|
break;
|
|
else {
|
|
createConfusedEdges(**SrcIt, **DstIt);
|
|
break;
|
|
}
|
|
}
|
|
if (!ReversedEdge)
|
|
createForwardEdge(**SrcIt, **DstIt);
|
|
} else
|
|
createForwardEdge(**SrcIt, **DstIt);
|
|
|
|
// Avoid creating duplicate edges.
|
|
if (ForwardEdgeCreated && BackwardEdgeCreated)
|
|
break;
|
|
}
|
|
|
|
// If we've created edges in both directions, there is no more
|
|
// unique edge that we can create between these two nodes, so we
|
|
// can exit early.
|
|
if (ForwardEdgeCreated && BackwardEdgeCreated)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
|
|
if (!shouldSimplify())
|
|
return;
|
|
LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
|
|
|
|
// This algorithm works by first collecting a set of candidate nodes that have
|
|
// an out-degree of one (in terms of def-use edges), and then ignoring those
|
|
// whose targets have an in-degree more than one. Each node in the resulting
|
|
// set can then be merged with its corresponding target and put back into the
|
|
// worklist until no further merge candidates are available.
|
|
SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
|
|
|
|
// A mapping between nodes and their in-degree. To save space, this map
|
|
// only contains nodes that are targets of nodes in the CandidateSourceNodes.
|
|
DenseMap<NodeType *, unsigned> TargetInDegreeMap;
|
|
|
|
for (NodeType *N : Graph) {
|
|
if (N->getEdges().size() != 1)
|
|
continue;
|
|
EdgeType &Edge = N->back();
|
|
if (!Edge.isDefUse())
|
|
continue;
|
|
CandidateSourceNodes.insert(N);
|
|
|
|
// Insert an element into the in-degree map and initialize to zero. The
|
|
// count will get updated in the next step.
|
|
TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
|
|
<< "\nNode with single outgoing def-use edge:\n";
|
|
for (NodeType *N : CandidateSourceNodes) {
|
|
dbgs() << N << "\n";
|
|
}
|
|
});
|
|
|
|
for (NodeType *N : Graph) {
|
|
for (EdgeType *E : *N) {
|
|
NodeType *Tgt = &E->getTargetNode();
|
|
auto TgtIT = TargetInDegreeMap.find(Tgt);
|
|
if (TgtIT != TargetInDegreeMap.end())
|
|
++(TgtIT->second);
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
|
|
<< "\nContent of in-degree map:\n";
|
|
for (auto &I : TargetInDegreeMap) {
|
|
dbgs() << I.first << " --> " << I.second << "\n";
|
|
}
|
|
});
|
|
|
|
SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
|
|
CandidateSourceNodes.end());
|
|
while (!Worklist.empty()) {
|
|
NodeType &Src = *Worklist.pop_back_val();
|
|
// As nodes get merged, we need to skip any node that has been removed from
|
|
// the candidate set (see below).
|
|
if (!CandidateSourceNodes.erase(&Src))
|
|
continue;
|
|
|
|
assert(Src.getEdges().size() == 1 &&
|
|
"Expected a single edge from the candidate src node.");
|
|
NodeType &Tgt = Src.back().getTargetNode();
|
|
assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
|
|
"Expected target to be in the in-degree map.");
|
|
|
|
if (TargetInDegreeMap[&Tgt] != 1)
|
|
continue;
|
|
|
|
if (!areNodesMergeable(Src, Tgt))
|
|
continue;
|
|
|
|
// Do not merge if there is also an edge from target to src (immediate
|
|
// cycle).
|
|
if (Tgt.hasEdgeTo(Src))
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
|
|
|
|
mergeNodes(Src, Tgt);
|
|
|
|
// If the target node is in the candidate set itself, we need to put the
|
|
// src node back into the worklist again so it gives the target a chance
|
|
// to get merged into it. For example if we have:
|
|
// {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
|
|
// then after merging (a) and (b) together, we need to put (a,b) back in
|
|
// the worklist so that (c) can get merged in as well resulting in
|
|
// {(a,b,c) -> d}
|
|
// We also need to remove the old target (b), from the worklist. We first
|
|
// remove it from the candidate set here, and skip any item from the
|
|
// worklist that is not in the set.
|
|
if (CandidateSourceNodes.erase(&Tgt)) {
|
|
Worklist.push_back(&Src);
|
|
CandidateSourceNodes.insert(&Src);
|
|
LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
|
|
}
|
|
}
|
|
LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
|
|
}
|
|
|
|
template <class G>
|
|
void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
|
|
|
|
// If we don't create pi-blocks, then we may not have a DAG.
|
|
if (!shouldCreatePiBlocks())
|
|
return;
|
|
|
|
SmallVector<NodeType *, 64> NodesInPO;
|
|
using NodeKind = typename NodeType::NodeKind;
|
|
for (NodeType *N : post_order(&Graph)) {
|
|
if (N->getKind() == NodeKind::PiBlock) {
|
|
// Put members of the pi-block right after the pi-block itself, for
|
|
// convenience.
|
|
const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
|
|
NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
|
|
PiBlockMembers.end());
|
|
}
|
|
NodesInPO.push_back(N);
|
|
}
|
|
|
|
size_t OldSize = Graph.Nodes.size();
|
|
Graph.Nodes.clear();
|
|
for (NodeType *N : reverse(NodesInPO))
|
|
Graph.Nodes.push_back(N);
|
|
if (Graph.Nodes.size() != OldSize)
|
|
assert(false &&
|
|
"Expected the number of nodes to stay the same after the sort");
|
|
}
|
|
|
|
template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
|
|
template class llvm::DependenceGraphInfo<DDGNode>;
|