mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
Teach the latency scheduler some new tricks. In particular, to break ties,
keep track of a sense of "mobility", i.e. how many other nodes scheduling one node will free up. For something like this: float testadd(float *X, float *Y, float *Z, float *W, float *V) { return (*X+*Y)*(*Z+*W)+*V; } For example, this makes us schedule *X then *Y, not *X then *Z. The former allows us to issue the add, the later only lets us issue other loads. This turns the above code from this: _testadd: lfs f0, 0(r3) lfs f1, 0(r6) lfs f2, 0(r4) lfs f3, 0(r5) fadds f0, f0, f2 fadds f1, f3, f1 lfs f2, 0(r7) fmadds f1, f0, f1, f2 blr into this: _testadd: lfs f0, 0(r6) lfs f1, 0(r5) fadds f0, f1, f0 lfs f1, 0(r4) lfs f2, 0(r3) fadds f1, f2, f1 lfs f2, 0(r7) fmadds f1, f1, f0, f2 blr llvm-svn: 26680
This commit is contained in:
parent
3fca15ca78
commit
920325db0a
@ -51,6 +51,8 @@ namespace {
|
|||||||
short NumChainSuccsLeft; // # of chain succs not scheduled.
|
short NumChainSuccsLeft; // # of chain succs not scheduled.
|
||||||
bool isTwoAddress : 1; // Is a two-address instruction.
|
bool isTwoAddress : 1; // Is a two-address instruction.
|
||||||
bool isDefNUseOperand : 1; // Is a def&use operand.
|
bool isDefNUseOperand : 1; // Is a def&use operand.
|
||||||
|
bool isAvailable : 1; // True once available.
|
||||||
|
bool isScheduled : 1; // True once scheduled.
|
||||||
unsigned short Latency; // Node latency.
|
unsigned short Latency; // Node latency.
|
||||||
unsigned CycleBound; // Upper/lower cycle to be scheduled at.
|
unsigned CycleBound; // Upper/lower cycle to be scheduled at.
|
||||||
unsigned NodeNum; // Entry # of node in the node vector.
|
unsigned NodeNum; // Entry # of node in the node vector.
|
||||||
@ -59,6 +61,7 @@ namespace {
|
|||||||
: Node(node), NumPredsLeft(0), NumSuccsLeft(0),
|
: Node(node), NumPredsLeft(0), NumSuccsLeft(0),
|
||||||
NumChainPredsLeft(0), NumChainSuccsLeft(0),
|
NumChainPredsLeft(0), NumChainSuccsLeft(0),
|
||||||
isTwoAddress(false), isDefNUseOperand(false),
|
isTwoAddress(false), isDefNUseOperand(false),
|
||||||
|
isAvailable(false), isScheduled(false),
|
||||||
Latency(0), CycleBound(0), NodeNum(nodenum) {}
|
Latency(0), CycleBound(0), NodeNum(nodenum) {}
|
||||||
|
|
||||||
void dump(const SelectionDAG *G) const;
|
void dump(const SelectionDAG *G) const;
|
||||||
@ -247,9 +250,11 @@ void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain) {
|
|||||||
|
|
||||||
if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
|
if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
|
||||||
// EntryToken has to go last! Special case it here.
|
// EntryToken has to go last! Special case it here.
|
||||||
if (PredSU->Node->getOpcode() != ISD::EntryToken)
|
if (PredSU->Node->getOpcode() != ISD::EntryToken) {
|
||||||
|
PredSU->isAvailable = true;
|
||||||
PriorityQueue->push(PredSU);
|
PriorityQueue->push(PredSU);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
|
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
|
||||||
@ -275,8 +280,10 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0)
|
if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
|
||||||
|
SuccSU->isAvailable = true;
|
||||||
PriorityQueue->push(SuccSU);
|
PriorityQueue->push(SuccSU);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
|
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
|
||||||
@ -350,8 +357,9 @@ void ScheduleDAGList::ListScheduleBottomUp() {
|
|||||||
PriorityQueue->push_all(NotReady);
|
PriorityQueue->push_all(NotReady);
|
||||||
NotReady.clear();
|
NotReady.clear();
|
||||||
|
|
||||||
PriorityQueue->ScheduledNode(CurrNode);
|
|
||||||
ScheduleNodeBottomUp(CurrNode);
|
ScheduleNodeBottomUp(CurrNode);
|
||||||
|
CurrNode->isScheduled = true;
|
||||||
|
PriorityQueue->ScheduledNode(CurrNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add entry node last
|
// Add entry node last
|
||||||
@ -432,9 +440,10 @@ void ScheduleDAGList::ListScheduleTopDown() {
|
|||||||
|
|
||||||
// If we found a node to schedule, do it now.
|
// If we found a node to schedule, do it now.
|
||||||
if (FoundNode) {
|
if (FoundNode) {
|
||||||
PriorityQueue->ScheduledNode(FoundNode);
|
|
||||||
ScheduleNodeTopDown(FoundNode);
|
ScheduleNodeTopDown(FoundNode);
|
||||||
HazardRec->EmitInstruction(FoundNode->Node);
|
HazardRec->EmitInstruction(FoundNode->Node);
|
||||||
|
FoundNode->isScheduled = true;
|
||||||
|
PriorityQueue->ScheduledNode(FoundNode);
|
||||||
} else if (!HasNoopHazards) {
|
} else if (!HasNoopHazards) {
|
||||||
// Otherwise, we have a pipeline stall, but no other problem, just advance
|
// Otherwise, we have a pipeline stall, but no other problem, just advance
|
||||||
// the current cycle and try again.
|
// the current cycle and try again.
|
||||||
@ -828,6 +837,12 @@ namespace {
|
|||||||
// for each node.
|
// for each node.
|
||||||
std::vector<int> Latencies;
|
std::vector<int> Latencies;
|
||||||
|
|
||||||
|
/// NumNodesSolelyBlocking - This vector contains, for every node in the
|
||||||
|
/// Queue, the number of nodes that the node is the sole unscheduled
|
||||||
|
/// predecessor for. This is used as a tie-breaker heuristic for better
|
||||||
|
/// mobility.
|
||||||
|
std::vector<unsigned> NumNodesSolelyBlocking;
|
||||||
|
|
||||||
std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
|
std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
|
||||||
public:
|
public:
|
||||||
LatencyPriorityQueue() : Queue(latency_sort(this)) {
|
LatencyPriorityQueue() : Queue(latency_sort(this)) {
|
||||||
@ -848,14 +863,21 @@ public:
|
|||||||
return Latencies[NodeNum];
|
return Latencies[NodeNum];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
|
||||||
|
assert(NodeNum < NumNodesSolelyBlocking.size());
|
||||||
|
return NumNodesSolelyBlocking[NodeNum];
|
||||||
|
}
|
||||||
|
|
||||||
bool empty() const { return Queue.empty(); }
|
bool empty() const { return Queue.empty(); }
|
||||||
|
|
||||||
void push(SUnit *U) {
|
virtual void push(SUnit *U) {
|
||||||
Queue.push(U);
|
push_impl(U);
|
||||||
}
|
}
|
||||||
|
void push_impl(SUnit *U);
|
||||||
|
|
||||||
void push_all(const std::vector<SUnit *> &Nodes) {
|
void push_all(const std::vector<SUnit *> &Nodes) {
|
||||||
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
|
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
|
||||||
Queue.push(Nodes[i]);
|
push_impl(Nodes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
SUnit *pop() {
|
SUnit *pop() {
|
||||||
@ -863,9 +885,38 @@ public:
|
|||||||
Queue.pop();
|
Queue.pop();
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScheduledNode - As nodes are scheduled, we look to see if there are any
|
||||||
|
// successor nodes that have a single unscheduled predecessor. If so, that
|
||||||
|
// single predecessor has a higher priority, since scheduling it will make
|
||||||
|
// the node available.
|
||||||
|
void ScheduledNode(SUnit *Node);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void CalculatePriorities();
|
void CalculatePriorities();
|
||||||
int CalcLatency(const SUnit &SU);
|
int CalcLatency(const SUnit &SU);
|
||||||
|
void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
|
||||||
|
|
||||||
|
/// RemoveFromPriorityQueue - This is a really inefficient way to remove a
|
||||||
|
/// node from a priority queue. We should roll our own heap to make this
|
||||||
|
/// better or something.
|
||||||
|
void RemoveFromPriorityQueue(SUnit *SU) {
|
||||||
|
std::vector<SUnit*> Temp;
|
||||||
|
|
||||||
|
assert(!Queue.empty() && "Not in queue!");
|
||||||
|
while (Queue.top() != SU) {
|
||||||
|
Temp.push_back(Queue.top());
|
||||||
|
Queue.pop();
|
||||||
|
assert(!Queue.empty() && "Not in queue!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the node from the PQ.
|
||||||
|
Queue.pop();
|
||||||
|
|
||||||
|
// Add all the other nodes back.
|
||||||
|
for (unsigned i = 0, e = Temp.size(); i != e; ++i)
|
||||||
|
Queue.push(Temp[i]);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -873,7 +924,22 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
|
|||||||
unsigned LHSNum = LHS->NodeNum;
|
unsigned LHSNum = LHS->NodeNum;
|
||||||
unsigned RHSNum = RHS->NodeNum;
|
unsigned RHSNum = RHS->NodeNum;
|
||||||
|
|
||||||
return PQ->getLatency(LHSNum) < PQ->getLatency(RHSNum);
|
// The most important heuristic is scheduling the critical path.
|
||||||
|
unsigned LHSLatency = PQ->getLatency(LHSNum);
|
||||||
|
unsigned RHSLatency = PQ->getLatency(RHSNum);
|
||||||
|
if (LHSLatency < RHSLatency) return true;
|
||||||
|
if (LHSLatency > RHSLatency) return false;
|
||||||
|
|
||||||
|
// After that, if two nodes have identical latencies, look to see if one will
|
||||||
|
// unblock more other nodes than the other.
|
||||||
|
unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
|
||||||
|
unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
|
||||||
|
if (LHSBlocked < RHSBlocked) return true;
|
||||||
|
if (LHSBlocked > RHSBlocked) return false;
|
||||||
|
|
||||||
|
// Finally, just to provide a stable ordering, use the node number as a
|
||||||
|
// deciding factor.
|
||||||
|
return LHSNum < RHSNum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -899,11 +965,92 @@ int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
|
|||||||
/// CalculatePriorities - Calculate priorities of all scheduling units.
|
/// CalculatePriorities - Calculate priorities of all scheduling units.
|
||||||
void LatencyPriorityQueue::CalculatePriorities() {
|
void LatencyPriorityQueue::CalculatePriorities() {
|
||||||
Latencies.assign(SUnits->size(), -1);
|
Latencies.assign(SUnits->size(), -1);
|
||||||
|
NumNodesSolelyBlocking.assign(SUnits->size(), 0);
|
||||||
|
|
||||||
for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
|
for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
|
||||||
CalcLatency((*SUnits)[i]);
|
CalcLatency((*SUnits)[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
|
||||||
|
/// of SU, return it, otherwise return null.
|
||||||
|
static SUnit *getSingleUnscheduledPred(SUnit *SU) {
|
||||||
|
SUnit *OnlyAvailablePred = 0;
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->Preds.begin(),
|
||||||
|
E = SU->Preds.end(); I != E; ++I)
|
||||||
|
if (!(*I)->isScheduled) {
|
||||||
|
// We found an available, but not scheduled, predecessor. If it's the
|
||||||
|
// only one we have found, keep track of it... otherwise give up.
|
||||||
|
if (OnlyAvailablePred && OnlyAvailablePred != *I)
|
||||||
|
return 0;
|
||||||
|
OnlyAvailablePred = *I;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->ChainSuccs.begin(),
|
||||||
|
E = SU->ChainSuccs.end(); I != E; ++I)
|
||||||
|
if (!(*I)->isScheduled) {
|
||||||
|
// We found an available, but not scheduled, predecessor. If it's the
|
||||||
|
// only one we have found, keep track of it... otherwise give up.
|
||||||
|
if (OnlyAvailablePred && OnlyAvailablePred != *I)
|
||||||
|
return 0;
|
||||||
|
OnlyAvailablePred = *I;
|
||||||
|
}
|
||||||
|
|
||||||
|
return OnlyAvailablePred;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LatencyPriorityQueue::push_impl(SUnit *SU) {
|
||||||
|
// Look at all of the successors of this node. Count the number of nodes that
|
||||||
|
// this node is the sole unscheduled node for.
|
||||||
|
unsigned NumNodesBlocking = 0;
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->Succs.begin(),
|
||||||
|
E = SU->Succs.end(); I != E; ++I)
|
||||||
|
if (getSingleUnscheduledPred(*I) == SU)
|
||||||
|
++NumNodesBlocking;
|
||||||
|
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->ChainSuccs.begin(),
|
||||||
|
E = SU->ChainSuccs.end(); I != E; ++I)
|
||||||
|
if (getSingleUnscheduledPred(*I) == SU)
|
||||||
|
++NumNodesBlocking;
|
||||||
|
|
||||||
|
Queue.push(SU);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ScheduledNode - As nodes are scheduled, we look to see if there are any
|
||||||
|
// successor nodes that have a single unscheduled predecessor. If so, that
|
||||||
|
// single predecessor has a higher priority, since scheduling it will make
|
||||||
|
// the node available.
|
||||||
|
void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->Succs.begin(),
|
||||||
|
E = SU->Succs.end(); I != E; ++I)
|
||||||
|
AdjustPriorityOfUnscheduledPreds(*I);
|
||||||
|
|
||||||
|
for (std::set<SUnit*>::const_iterator I = SU->ChainSuccs.begin(),
|
||||||
|
E = SU->ChainSuccs.end(); I != E; ++I)
|
||||||
|
AdjustPriorityOfUnscheduledPreds(*I);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
|
||||||
|
/// scheduled. If SU is not itself available, then there is at least one
|
||||||
|
/// predecessor node that has not been scheduled yet. If SU has exactly ONE
|
||||||
|
/// unscheduled predecessor, we want to increase its priority: it getting
|
||||||
|
/// scheduled will make this node available, so it is better than some other
|
||||||
|
/// node of the same priority that will not make a node available.
|
||||||
|
void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
|
||||||
|
if (SU->isAvailable) return; // All preds scheduled.
|
||||||
|
|
||||||
|
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
|
||||||
|
if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
|
||||||
|
|
||||||
|
// Okay, we found a single predecessor that is available, but not scheduled.
|
||||||
|
// Since it is available, it must be in the priority queue. First remove it.
|
||||||
|
RemoveFromPriorityQueue(OnlyAvailablePred);
|
||||||
|
|
||||||
|
// Reinsert the node into the priority queue, which recomputes its
|
||||||
|
// NumNodesSolelyBlocking value.
|
||||||
|
push(OnlyAvailablePred);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Public Constructor Functions
|
// Public Constructor Functions
|
||||||
|
Loading…
Reference in New Issue
Block a user