1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

Bug fixes:

(1) Ensure that delay slot instructions are not moved out of place (this
    was happening for some CALL instructions).  Basically, we need to
    move all delay slot instructions out of the graph and handle them
    along with the delayed control transfer instruction.
(2) Mark scheduled instructions correctly when instructions are scheduled
    in more than one cycle in a single step (due to delay slots).

llvm-svn: 678
This commit is contained in:
Vikram S. Adve 2001-09-30 23:43:34 +00:00
parent d376407048
commit 180dee14b8

View File

@ -9,16 +9,26 @@
// 7/23/01 - Vikram Adve - Created
//**************************************************************************/
//************************* User Include Files *****************************/
#include "llvm/CodeGen/InstrScheduling.h"
#include "SchedPriorities.h"
#include "llvm/Analysis/LiveVar/BBLiveVar.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Instruction.h"
//************************ System Include Files *****************************/
#include <hash_set>
#include <algorithm>
#include <iterator>
//************************* External Data Types *****************************/
cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
"enable instruction scheduling debugging information",
clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"),
@ -27,55 +37,12 @@ cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
//************************* Internal Data Types *****************************/
class InstrSchedule;
class SchedulingManager;
class DelaySlotInfo;
static void ForwardListSchedule (SchedulingManager& S);
static void RecordSchedule (const BasicBlock* bb,
const SchedulingManager& S);
static unsigned ChooseOneGroup (SchedulingManager& S);
static void MarkSuccessorsReady (SchedulingManager& S,
const SchedGraphNode* node);
static unsigned FindSlotChoices (SchedulingManager& S,
DelaySlotInfo*& getDelaySlotInfo);
static void AssignInstructionsToSlots(class SchedulingManager& S,
unsigned maxIssue);
static void ScheduleInstr (class SchedulingManager& S,
const SchedGraphNode* node,
unsigned int slotNum,
cycles_t curTime);
static bool ViolatesMinimumGap (const SchedulingManager& S,
MachineOpCode opCode,
const cycles_t inCycle);
static bool ConflictsWithChoices (const SchedulingManager& S,
MachineOpCode opCode);
static void ChooseInstructionsForDelaySlots(SchedulingManager& S,
const BasicBlock* bb,
SchedGraph* graph);
static bool NodeCanFillDelaySlot (const SchedulingManager& S,
const SchedGraphNode* node,
const SchedGraphNode* brNode,
bool nodeIsPredecessor);
static void MarkNodeForDelaySlot (SchedulingManager& S,
SchedGraph* graph,
SchedGraphNode* node,
const SchedGraphNode* brNode,
bool nodeIsPredecessor);
//************************* Internal Data Types *****************************/
//----------------------------------------------------------------------
// class InstrGroup:
@ -369,7 +336,7 @@ public:
delayedNodeSlotNum = slotNum;
}
void scheduleDelayedNode (SchedulingManager& S);
unsigned scheduleDelayedNode (SchedulingManager& S);
};
@ -539,7 +506,7 @@ public:
if (createIfMissing)
{
dinfo = new DelaySlotInfo(bn,
getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
delaySlotInfoForBranches[bn] = dinfo;
}
else
@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
}
}
//************************* External Functions *****************************/
//---------------------------------------------------------------------------
// Function: ScheduleInstructionsWithSSA
//
// Purpose:
// Entry point for instruction scheduling on SSA form.
// Schedules the machine instructions generated by instruction selection.
// Assumes that register allocation has not been done, i.e., operands
// are still in SSA form.
//---------------------------------------------------------------------------
bool
ScheduleInstructionsWithSSA(Method* method,
const TargetMachine &target)
{
SchedGraphSet graphSet(method, target);
if (SchedDebugLevel >= Sched_PrintSchedGraphs)
{
cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
<< endl;
graphSet.dump();
}
for (SchedGraphSet::const_iterator GI=graphSet.begin();
GI != graphSet.end(); ++GI)
{
SchedGraph* graph = (*GI).second;
const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
const BasicBlock* bb = bbvec[0];
if (SchedDebugLevel >= Sched_PrintSchedTrace)
cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
SchedPriorities schedPrio(method, graph); // expensive!
SchedulingManager S(target, graph, schedPrio);
ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
ForwardListSchedule(S); // computes schedule in S
RecordSchedule((*GI).first, S); // records schedule in BB
}
if (SchedDebugLevel >= Sched_PrintMachineCode)
{
cout << endl
<< "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
PrintMachineInstructions(method);
}
return false; // no reason to fail yet
}
// Check minimum gap requirements relative to instructions scheduled in
// previous cycles.
// Note that we do not need to consider `nextEarliestIssueTime' here because
// that is also captured in the earliest start times for each opcode.
//
static inline bool
ViolatesMinimumGap(const SchedulingManager& S,
MachineOpCode opCode,
const cycles_t inCycle)
{
return (inCycle < S.getEarliestStartTimeForOp(opCode));
}
// Check if the instruction would conflict with instructions already
// chosen for the current cycle
//
static inline bool
ConflictsWithChoices(const SchedulingManager& S,
MachineOpCode opCode)
{
// Check if the instruction must issue by itself, and some feasible
// choices have already been made for this cycle
if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
return true;
// For each class that opCode belongs to, check if there are too many
// instructions of that class.
//
const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
}
// Check if any issue restrictions would prevent the instruction from
// being issued in the current cycle
//
bool
instrIsFeasible(const SchedulingManager& S,
MachineOpCode opCode)
{
// skip the instruction if it cannot be issued due to issue restrictions
// caused by previously issued instructions
if (ViolatesMinimumGap(S, opCode, S.getTime()))
return false;
// skip the instruction if it cannot be issued due to issue restrictions
// caused by previously chosen instructions for the current cycle
if (ConflictsWithChoices(S, opCode))
return false;
return true;
}
//************************* Internal Functions *****************************/
static void
ForwardListSchedule(SchedulingManager& S)
AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
{
unsigned N;
const SchedGraphNode* node;
// find the slot to start from, in the current cycle
unsigned int startSlot = 0;
cycles_t curTime = S.getTime();
S.schedPrio.initialize();
assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
while ((N = S.schedPrio.getNumReady()) > 0)
// If only one instruction can be issued, do so.
if (maxIssue == 1)
for (unsigned s=startSlot; s < S.nslots; s++)
if (S.getChoicesForSlot(s).size() > 0)
{// found the one instruction
S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
return;
}
// Otherwise, choose from the choices for each slot
//
InstrGroup* igroup = S.isched.getIGroup(S.getTime());
assert(igroup != NULL && "Group creation failed?");
// Find a slot that has only a single choice, and take it.
// If all slots have 0 or multiple choices, pick the first slot with
// choices and use its last instruction (just to avoid shifting the vector).
unsigned numIssued;
for (numIssued = 0; numIssued < maxIssue; numIssued++)
{
// Choose one group of instructions for a cycle. This will
// advance S.getTime() to the first cycle instructions can be issued.
// It may also schedule delay slot instructions in later cycles,
// but those are ignored here because they are outside the graph.
//
unsigned numIssued = ChooseOneGroup(S);
assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
// Notify the priority manager of scheduled instructions and mark
// any successors that may now be ready
//
const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
for (unsigned int s=0; s < S.nslots; s++)
if ((node = (*igroup)[s]) != NULL)
int chosenSlot = -1, chosenNodeIndex = -1;
for (unsigned s=startSlot; s < S.nslots; s++)
if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
{
S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
MarkSuccessorsReady(S, node);
chosenSlot = (int) s;
break;
}
// Move to the next the next earliest cycle for which
// an instruction can be issued, or the next earliest in which
// one will be ready, or to the next cycle, whichever is latest.
//
S.updateTime(max(S.getTime() + 1,
max(S.getEarliestIssueTime(),
S.schedPrio.getEarliestReadyTime())));
if (chosenSlot == -1)
for (unsigned s=startSlot; s < S.nslots; s++)
if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
{
chosenSlot = (int) s;
break;
}
if (chosenSlot != -1)
{ // Insert the chosen instr in the chosen slot and
// erase it from all slots.
const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
S.scheduleInstr(node, chosenSlot, curTime);
}
}
assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
}
@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
}
static unsigned
ChooseOneGroup(SchedulingManager& S)
{
assert(S.schedPrio.getNumReady() > 0
&& "Don't get here without ready instructions.");
DelaySlotInfo* getDelaySlotInfo = NULL;
// Choose up to `nslots' feasible instructions and their possible slots.
unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
while (numIssued == 0)
{
S.updateTime(S.getTime()+1);
numIssued = FindSlotChoices(S, getDelaySlotInfo);
}
AssignInstructionsToSlots(S, numIssued);
if (getDelaySlotInfo != NULL)
getDelaySlotInfo->scheduleDelayedNode(S);
// Print trace of scheduled instructions before newly ready ones
if (SchedDebugLevel >= Sched_PrintSchedTrace)
{
cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n";
const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
for (unsigned int s=0; s < S.nslots; s++)
{
cout << " ";
if ((*igroup)[s] != NULL)
cout << * ((*igroup)[s])->getMachineInstr() << endl;
else
cout << "<none>" << endl;
}
}
return numIssued;
}
static void
MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S,
{
// Try to assign every other instruction to a lower numbered
// slot than delayedNodeSlot.
MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
bool noSlotFound = true;
unsigned int s;
for (s=startSlot; s < delayedNodeSlot; s++)
@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S,
for (unsigned i=0;
i < S.getNumChoices() && i < indexForBreakingNode; i++)
{
MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
// If a higher priority instruction cannot be assigned to
// any earlier slots, don't schedule the breaking instruction.
@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S,
}
static void
AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
static unsigned
ChooseOneGroup(SchedulingManager& S)
{
// find the slot to start from, in the current cycle
unsigned int startSlot = 0;
cycles_t curTime = S.getTime();
assert(S.schedPrio.getNumReady() > 0
&& "Don't get here without ready instructions.");
assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
cycles_t firstCycle = S.getTime();
DelaySlotInfo* getDelaySlotInfo = NULL;
// If only one instruction can be issued, do so.
if (maxIssue == 1)
for (unsigned s=startSlot; s < S.nslots; s++)
if (S.getChoicesForSlot(s).size() > 0)
{// found the one instruction
S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
return;
}
// Choose up to `nslots' feasible instructions and their possible slots.
unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
// Otherwise, choose from the choices for each slot
//
InstrGroup* igroup = S.isched.getIGroup(S.getTime());
assert(igroup != NULL && "Group creation failed?");
// Find a slot that has only a single choice, and take it.
// If all slots have 0 or multiple choices, pick the first slot with
// choices and use its last instruction (just to avoid shifting the vector).
unsigned numIssued;
for (numIssued = 0; numIssued < maxIssue; numIssued++)
while (numIssued == 0)
{
int chosenSlot = -1, chosenNodeIndex = -1;
for (unsigned s=startSlot; s < S.nslots; s++)
if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
{
chosenSlot = (int) s;
break;
}
if (chosenSlot == -1)
for (unsigned s=startSlot; s < S.nslots; s++)
if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
{
chosenSlot = (int) s;
break;
}
if (chosenSlot != -1)
{ // Insert the chosen instr in the chosen slot and
// erase it from all slots.
const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
S.scheduleInstr(node, chosenSlot, curTime);
}
S.updateTime(S.getTime()+1);
numIssued = FindSlotChoices(S, getDelaySlotInfo);
}
assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
AssignInstructionsToSlots(S, numIssued);
if (getDelaySlotInfo != NULL)
numIssued += getDelaySlotInfo->scheduleDelayedNode(S);
// Print trace of scheduled instructions before newly ready ones
if (SchedDebugLevel >= Sched_PrintSchedTrace)
{
for (cycles_t c = firstCycle; c <= S.getTime(); c++)
{
cout << " Cycle " << c << " : Scheduled instructions:\n";
const InstrGroup* igroup = S.isched.getIGroup(c);
for (unsigned int s=0; s < S.nslots; s++)
{
cout << " ";
if ((*igroup)[s] != NULL)
cout << * ((*igroup)[s])->getMachineInstr() << endl;
else
cout << "<none>" << endl;
}
}
}
return numIssued;
}
static void
ForwardListSchedule(SchedulingManager& S)
{
unsigned N;
const SchedGraphNode* node;
S.schedPrio.initialize();
while ((N = S.schedPrio.getNumReady()) > 0)
{
cycles_t nextCycle = S.getTime();
// Choose one group of instructions for a cycle, plus any delay slot
// instructions (which may overflow into successive cycles).
// This will advance S.getTime() to the last cycle in which
// instructions are actually issued.
//
unsigned numIssued = ChooseOneGroup(S);
assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
// Notify the priority manager of scheduled instructions and mark
// any successors that may now be ready
//
for (cycles_t c = nextCycle; c <= S.getTime(); c++)
{
const InstrGroup* igroup = S.isched.getIGroup(c);
for (unsigned int s=0; s < S.nslots; s++)
if ((node = (*igroup)[s]) != NULL)
{
S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
MarkSuccessorsReady(S, node);
}
}
// Move to the next the next earliest cycle for which
// an instruction can be issued, or the next earliest in which
// one will be ready, or to the next cycle, whichever is latest.
//
S.updateTime(max(S.getTime() + 1,
max(S.getEarliestIssueTime(),
S.schedPrio.getEarliestReadyTime())));
}
}
//---------------------------------------------------------------------
// Code for filling delay slots for delayed terminator instructions
@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
// when we cannot find single-cycle instructions that can be reordered.
//----------------------------------------------------------------------
static void
ChooseInstructionsForDelaySlots(SchedulingManager& S,
const BasicBlock* bb,
SchedGraph* graph)
{
// Look for instructions that can be used for delay slots.
// Remove them from the graph, and mark them to be used for delay slots.
const MachineInstrInfo& mii = S.getInstrInfo();
const TerminatorInst* term = bb->getTerminator();
MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
// Find the first branch instr in the sequence of machine instrs for term
//
unsigned first = 0;
while (! mii.isBranch(termMvec[first]->getOpCode()))
++first;
assert(first < termMvec.size() &&
"No branch instructions for BR? Ok, but weird! Delete assertion.");
if (first == termMvec.size())
return;
SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]);
assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?");
unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
if (ndelays == 0)
return;
// Use vectors to remember the nodes chosen for delay slots, and the
// NOPs that will be unused. We cannot remove them from the graph while
// walking through the preds and succs of the brNode here, so we
// remember the nodes in the vectors and remove them later.
// We use separate vectors for the single-cycle and multi-cycle nodes,
// so that we can give preference to single-cycle nodes.
//
vector<SchedGraphNode*> sdelayNodeVec;
vector<SchedGraphNode*> mdelayNodeVec;
vector<SchedGraphNode*> nopNodeVec;
unsigned numUseful = 0;
sdelayNodeVec.reserve(ndelays);
for (sg_pred_iterator P = pred_begin(brNode);
P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
if (! (*P)->isDummyNode() &&
! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
{
++numUseful;
if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
mdelayNodeVec.push_back(*P);
else
sdelayNodeVec.push_back(*P);
}
// If not enough single-cycle instructions were found, select the
// lowest-latency multi-cycle instructions and use them.
// Note that this is the most efficient code when only 1 (or even 2)
// values need to be selected.
//
while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
{
unsigned latency;
unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
unsigned minIndex = 0;
for (unsigned i=1; i < mdelayNodeVec.size(); i++)
if (minLatency >=
(latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode())))
{
minLatency = latency;
minIndex = i;
}
sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
}
// Now, remove the NOPs currently in delay slots from the graph.
// If not enough useful instructions were found, use the NOPs to
// fill delay slots, otherwise, just discard them.
for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I)
if (! (*I)->isDummyNode()
&& mii.isNop((*I)->getMachineInstr()->getOpCode()))
{
if (sdelayNodeVec.size() < ndelays)
sdelayNodeVec.push_back(*I);
else
nopNodeVec.push_back(*I);
}
// Mark the nodes chosen for delay slots. This removes them from the graph.
for (unsigned i=0; i < sdelayNodeVec.size(); i++)
MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true);
// And remove the unused NOPs from the graph.
for (unsigned i=0; i < nopNodeVec.size(); i++)
graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
}
bool
static bool
NodeCanFillDelaySlot(const SchedulingManager& S,
const SchedGraphNode* node,
const SchedGraphNode* brNode,
@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
}
void
static void
MarkNodeForDelaySlot(SchedulingManager& S,
SchedGraph* graph,
SchedGraphNode* node,
@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S,
}
void
FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
SchedGraphNode* brNode,
vector<SchedGraphNode*>& sdelayNodeVec)
{
const MachineInstrInfo& mii = S.getInstrInfo();
unsigned ndelays =
mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
if (ndelays == 0)
return;
sdelayNodeVec.reserve(ndelays);
// Use a separate vector to hold the feasible multi-cycle nodes.
// These will be used if not enough single-cycle nodes are found.
//
vector<SchedGraphNode*> mdelayNodeVec;
for (sg_pred_iterator P = pred_begin(brNode);
P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
if (! (*P)->isDummyNode() &&
! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
{
if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
mdelayNodeVec.push_back(*P);
else
sdelayNodeVec.push_back(*P);
}
// If not enough single-cycle instructions were found, select the
// lowest-latency multi-cycle instructions and use them.
// Note that this is the most efficient code when only 1 (or even 2)
// values need to be selected.
//
while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
{
unsigned lmin =
mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
unsigned minIndex = 0;
for (unsigned i=1; i < mdelayNodeVec.size(); i++)
{
unsigned li =
mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
if (lmin >= li)
{
lmin = li;
minIndex = i;
}
}
sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
}
}
// Remove the NOPs currently in delay slots from the graph.
// Mark instructions specified in sdelayNodeVec to replace them.
// If not enough useful instructions were found, mark the NOPs to be used
// for filling delay slots, otherwise, otherwise just discard them.
//
void
ReplaceNopsWithUsefulInstr(SchedulingManager& S,
SchedGraphNode* node,
vector<SchedGraphNode*> sdelayNodeVec,
SchedGraph* graph)
{
vector<SchedGraphNode*> nopNodeVec;
const MachineInstrInfo& mii = S.getInstrInfo();
unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
assert(ndelays > 0 && "Unnecessary call to replace NOPs");
// Remove the NOPs currently in delay slots from the graph.
// If not enough useful instructions were found, use the NOPs to
// fill delay slots, otherwise, just discard them.
for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
if (! (*I)->isDummyNode()
&& mii.isNop((*I)->getMachineInstr()->getOpCode()))
{
if (sdelayNodeVec.size() < ndelays)
sdelayNodeVec.push_back(*I);
else
nopNodeVec.push_back(*I);
}
assert(sdelayNodeVec.size() == ndelays);
// Mark the nodes chosen for delay slots. This removes them from the graph.
for (unsigned i=0; i < sdelayNodeVec.size(); i++)
MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
// And remove the unused NOPs from the graph.
for (unsigned i=0; i < nopNodeVec.size(); i++)
graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
}
// For all delayed instructions, choose instructions to put in the delay
// slots and pull those out of the graph. Mark them for the delay slots
// in the DelaySlotInfo object for that graph node. If no useful work
// is found for a delay slot, use the NOP that is currently in that slot.
//
// We try to fill the delay slots with useful work for all instructions
// except CALLs. For CALLs, it is nearly always possible to use one of the
// call sequence instrs and putting anything else in the delay slot could be
// suboptimal.
//
static void
ChooseInstructionsForDelaySlots(SchedulingManager& S,
const BasicBlock* bb,
SchedGraph* graph)
{
const MachineInstrInfo& mii = S.getInstrInfo();
const TerminatorInst* termInstr = bb->getTerminator();
MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
vector<SchedGraphNode*> delayNodeVec;
const MachineInstr* brInstr;
assert(termInstr->getOpcode() != Instruction::Call
&& "Call used as terminator?");
// To find instructions that need delay slots without searching the entire
// machine code, we assume the only delayed instructions are CALLs or
// instructions generated for the terminator inst.
// Find the first branch instr in the sequence of machine instrs for term
//
unsigned first = 0;
while (first < termMvec.size() &&
! mii.isBranch(termMvec[first]->getOpCode()))
{
++first;
}
assert(first < termMvec.size() &&
"No branch instructions for BR? Ok, but weird! Delete assertion.");
brInstr = (first < termMvec.size())? termMvec[first] : NULL;
// Compute a vector of the nodes chosen for delay slots and then
// mark delay slots to replace NOPs with these useful instructions.
//
if (brInstr != NULL)
{
SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
}
// Also mark delay slots for other delayed instructions to hold NOPs.
// Simply passing in an empty delayNodeVec will have this effect.
//
delayNodeVec.clear();
const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
for (unsigned i=0; i < bbMvec.size(); i++)
if (bbMvec[i] != brInstr &&
mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
{
SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
}
}
//
// Schedule the delayed branch and its delay slots
//
void
unsigned
DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
{
assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
break;
}
return 1 + ndelays;
}
// Check if the instruction would conflict with instructions already
// chosen for the current cycle
//
static inline bool
ConflictsWithChoices(const SchedulingManager& S,
MachineOpCode opCode)
{
// Check if the instruction must issue by itself, and some feasible
// choices have already been made for this cycle
if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
return true;
// For each class that opCode belongs to, check if there are too many
// instructions of that class.
//
const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
}
//************************* External Functions *****************************/
//---------------------------------------------------------------------------
// Function: ViolatesMinimumGap
//
// Purpose:
// Check minimum gap requirements relative to instructions scheduled in
// previous cycles.
// Note that we do not need to consider `nextEarliestIssueTime' here because
// that is also captured in the earliest start times for each opcode.
//---------------------------------------------------------------------------
static inline bool
ViolatesMinimumGap(const SchedulingManager& S,
MachineOpCode opCode,
const cycles_t inCycle)
{
return (inCycle < S.getEarliestStartTimeForOp(opCode));
}
//---------------------------------------------------------------------------
// Function: instrIsFeasible
//
// Purpose:
// Check if any issue restrictions would prevent the instruction from
// being issued in the current cycle
//---------------------------------------------------------------------------
bool
instrIsFeasible(const SchedulingManager& S,
MachineOpCode opCode)
{
// skip the instruction if it cannot be issued due to issue restrictions
// caused by previously issued instructions
if (ViolatesMinimumGap(S, opCode, S.getTime()))
return false;
// skip the instruction if it cannot be issued due to issue restrictions
// caused by previously chosen instructions for the current cycle
if (ConflictsWithChoices(S, opCode))
return false;
return true;
}
//---------------------------------------------------------------------------
// Function: ScheduleInstructionsWithSSA
//
// Purpose:
// Entry point for instruction scheduling on SSA form.
// Schedules the machine instructions generated by instruction selection.
// Assumes that register allocation has not been done, i.e., operands
// are still in SSA form.
//---------------------------------------------------------------------------
bool
ScheduleInstructionsWithSSA(Method* method,
const TargetMachine &target)
{
SchedGraphSet graphSet(method, target);
if (SchedDebugLevel >= Sched_PrintSchedGraphs)
{
cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
<< endl;
graphSet.dump();
}
for (SchedGraphSet::const_iterator GI=graphSet.begin();
GI != graphSet.end(); ++GI)
{
SchedGraph* graph = (*GI).second;
const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
const BasicBlock* bb = bbvec[0];
if (SchedDebugLevel >= Sched_PrintSchedTrace)
cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
SchedPriorities schedPrio(method, graph); // expensive!
SchedulingManager S(target, graph, schedPrio);
ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
ForwardListSchedule(S); // computes schedule in S
RecordSchedule((*GI).first, S); // records schedule in BB
}
if (SchedDebugLevel >= Sched_PrintMachineCode)
{
cout << endl
<< "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
PrintMachineInstructions(method);
}
return false; // no reason to fail yet
}