1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

Reapply r263460: [SpillPlacement] Fix a quadratic behavior in spill placement.

Using Chandler's words from r265331:
This commit was greatly exacerbating PR17409 and effectively regressed
build time for lot of (very large) code when compiled with ASan or MSan.

PR17409 is fixed by r269249, so this is fine to reapply r263460.

Original commit message:
The bad behavior happens when we have a function with a long linear
chain of basic blocks, and have a live range spanning most of this
chain, but with very few uses.

Let say we have only 2 uses.

The Hopfield network is only seeded with two active blocks where the
uses are, and each iteration of the outer loop in
`RAGreedy::growRegion()` only adds two new nodes to the network due to
the completely linear shape of the CFG.  Meanwhile,
`SpillPlacer->iterate()` visits the whole set of discovered nodes, which
adds up to a quadratic algorithm.

This is an historical accident effect from r129188.

When the Hopfield network is expanding, most of the action is happening
on the frontier where new nodes are being added. The internal nodes in
the network are not likely to be flip-flopping much, or they will at
least settle down very quickly. This means that while
`SpillPlacer->iterate()` is recomputing all the nodes in the network, it
is probably only the two frontier nodes that are changing their output.

Instead of recomputing the whole network on each iteration, we can
maintain a SparseSet of nodes that need to be updated:

- `SpillPlacement::activate()` adds the node to the todo list.
- When a node changes value (i.e., `update()` returns true), its
  neighbors are added to the todo list.
- `SpillPlacement::iterate()` only updates the nodes in the list.

The result of Hopfield iterations is not necessarily exact. It should
converge to a local minimum, but there is no guarantee that it will find
a global minimum. It is possible that updating nodes in a different
order will cause us to switch to a different local minimum. In other
words, this is not NFC, but although I saw a few runtime improvements
and regressions when I benchmarked this change, those were side effects
and actually the performance change is in the noise as expected.

Huge thanks to Jakob Stoklund Olesen <stoklund@2pi.dk> for his
feedbacks, guidance and time for the review.

llvm-svn: 270149
This commit is contained in:
Quentin Colombet 2016-05-19 22:40:37 +00:00
parent 62b7ba5ca2
commit 970400db38
2 changed files with 44 additions and 53 deletions

View File

@ -173,6 +173,17 @@ struct SpillPlacement::Node {
Value = 0; Value = 0;
return Before != preferReg(); return Before != preferReg();
} }
void getDissentingNeighbors(SparseSet<unsigned> &List,
const Node nodes[]) const {
for (const auto &Elt : Links) {
unsigned n = Elt.second;
// Neighbors that already have the same value are not going to
// change because of this node changing.
if (Value != nodes[n].Value)
List.insert(n);
}
}
}; };
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
@ -182,6 +193,8 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
assert(!nodes && "Leaking node array"); assert(!nodes && "Leaking node array");
nodes = new Node[bundles->getNumBundles()]; nodes = new Node[bundles->getNumBundles()];
TodoList.clear();
TodoList.setUniverse(bundles->getNumBundles());
// Compute total ingoing and outgoing block frequencies for all bundles. // Compute total ingoing and outgoing block frequencies for all bundles.
BlockFrequencies.resize(mf.getNumBlockIDs()); BlockFrequencies.resize(mf.getNumBlockIDs());
@ -199,10 +212,12 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
void SpillPlacement::releaseMemory() { void SpillPlacement::releaseMemory() {
delete[] nodes; delete[] nodes;
nodes = nullptr; nodes = nullptr;
TodoList.clear();
} }
/// activate - mark node n as active if it wasn't already. /// activate - mark node n as active if it wasn't already.
void SpillPlacement::activate(unsigned n) { void SpillPlacement::activate(unsigned n) {
TodoList.insert(n);
if (ActiveNodes->test(n)) if (ActiveNodes->test(n))
return; return;
ActiveNodes->set(n); ActiveNodes->set(n);
@ -287,10 +302,6 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
continue; continue;
activate(ib); activate(ib);
activate(ob); activate(ob);
if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
Linked.push_back(ib);
if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
Linked.push_back(ob);
BlockFrequency Freq = BlockFrequencies[Number]; BlockFrequency Freq = BlockFrequencies[Number];
nodes[ib].addLink(ob, Freq); nodes[ib].addLink(ob, Freq);
nodes[ob].addLink(ib, Freq); nodes[ob].addLink(ib, Freq);
@ -298,76 +309,50 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
} }
bool SpillPlacement::scanActiveBundles() { bool SpillPlacement::scanActiveBundles() {
Linked.clear();
RecentPositive.clear(); RecentPositive.clear();
for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
nodes[n].update(nodes, Threshold); update(n);
// A node that must spill, or a node without any links is not going to // A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations. // change its value ever again, so exclude it from iterations.
if (nodes[n].mustSpill()) if (nodes[n].mustSpill())
continue; continue;
if (!nodes[n].Links.empty())
Linked.push_back(n);
if (nodes[n].preferReg()) if (nodes[n].preferReg())
RecentPositive.push_back(n); RecentPositive.push_back(n);
} }
return !RecentPositive.empty(); return !RecentPositive.empty();
} }
bool SpillPlacement::update(unsigned n) {
if (!nodes[n].update(nodes, Threshold))
return false;
nodes[n].getDissentingNeighbors(TodoList, nodes);
return true;
}
/// iterate - Repeatedly update the Hopfield nodes until stability or the /// iterate - Repeatedly update the Hopfield nodes until stability or the
/// maximum number of iterations is reached. /// maximum number of iterations is reached.
/// @param Linked - Numbers of linked nodes that need updating.
void SpillPlacement::iterate() { void SpillPlacement::iterate() {
// First update the recently positive nodes. They have likely received new // We do not need to push those node in the todolist.
// negative bias that will turn them off. // They are already been proceeded as part of the previous iteration.
while (!RecentPositive.empty()) RecentPositive.clear();
nodes[RecentPositive.pop_back_val()].update(nodes, Threshold);
if (Linked.empty()) // Since the last iteration, the todolist have been augmented by calls
return; // to addConstraints, addLinks, and co.
// Update the network energy starting at this new frontier.
// Run up to 10 iterations. The edge bundle numbering is closely related to // The call to ::update will add the nodes that changed into the todolist.
// basic block numbering, so there is a strong tendency towards chains of unsigned Limit = bundles->getNumBundles() * 10;
// linked nodes with sequential numbers. By scanning the linked nodes while(Limit-- > 0 && !TodoList.empty()) {
// backwards and forwards, we make it very likely that a single node can unsigned n = TodoList.pop_back_val();
// affect the entire network in a single iteration. That means very fast if (!update(n))
// convergence, usually in a single iteration. continue;
for (unsigned iteration = 0; iteration != 10; ++iteration) {
// Scan backwards, skipping the last node when iteration is not zero. When
// iteration is not zero, the last node was just updated.
bool Changed = false;
for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()),
E = Linked.rend(); I != E; ++I) {
unsigned n = *I;
if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg()) if (nodes[n].preferReg())
RecentPositive.push_back(n); RecentPositive.push_back(n);
} }
}
if (!Changed || !RecentPositive.empty())
return;
// Scan forwards, skipping the first node which was just updated.
Changed = false;
for (SmallVectorImpl<unsigned>::const_iterator I =
std::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
unsigned n = *I;
if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg())
RecentPositive.push_back(n);
}
}
if (!Changed || !RecentPositive.empty())
return;
}
} }
void SpillPlacement::prepare(BitVector &RegBundles) { void SpillPlacement::prepare(BitVector &RegBundles) {
Linked.clear();
RecentPositive.clear(); RecentPositive.clear();
TodoList.clear();
// Reuse RegBundles as our ActiveNodes vector. // Reuse RegBundles as our ActiveNodes vector.
ActiveNodes = &RegBundles; ActiveNodes = &RegBundles;
ActiveNodes->clear(); ActiveNodes->clear();

View File

@ -29,6 +29,7 @@
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BlockFrequency.h"
@ -66,6 +67,9 @@ class SpillPlacement : public MachineFunctionPass {
/// its inputs falls in the open interval (-Threshold;Threshold). /// its inputs falls in the open interval (-Threshold;Threshold).
BlockFrequency Threshold; BlockFrequency Threshold;
/// List of nodes that need to be updated in ::iterate.
SparseSet<unsigned> TodoList;
public: public:
static char ID; // Pass identification, replacement for typeid. static char ID; // Pass identification, replacement for typeid.
@ -157,6 +161,8 @@ private:
void activate(unsigned); void activate(unsigned);
void setThreshold(const BlockFrequency &Entry); void setThreshold(const BlockFrequency &Entry);
bool update(unsigned);
}; };
} // end namespace llvm } // end namespace llvm