1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[DFAPacketizer] Allow up to 64 functional units

Summary:
To drive the automaton we used a uint64_t as an action type. This
contained the transition's resource requirements as a conjunction:

  (a OR b) AND (b OR c)

We encoded this conjunction as a sequence of four 16-bit bitmasks.
This limited the number of addressable functional units to 16, which
is quite low and has bitten many people in the past.

Instead, the DFAEmitter now generates a lookup table from InstrItinerary
class (index of the ItinData inside the ProcItineraries) to an internal
action index which is essentially a dense embedding of the conjunctive
form. Because we never materialize the conjunctive form, we no longer
have the 16 FU restriction.

In this patch we limit to 64 functional units due to using a uint64_t
bitmask in the DFAEmitter. Now that we've decoupled these representations
we can increase this in future.

Reviewers: ThomasRaoux, kparzysz, majnemer

Reviewed By: ThomasRaoux

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69110
This commit is contained in:
jmolloy 2019-11-04 19:25:13 +00:00 committed by James Molloy
parent 829090e5cd
commit 548ef37194
3 changed files with 156 additions and 425 deletions

View File

@ -46,43 +46,18 @@ class MCInstrDesc;
class SUnit;
class TargetInstrInfo;
// --------------------------------------------------------------------
// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
//
// e.g. terms x resource bit combinations that fit in uint32_t:
// 4 terms x 8 bits = 32 bits
// 3 terms x 10 bits = 30 bits
// 2 terms x 16 bits = 32 bits
//
// e.g. terms x resource bit combinations that fit in uint64_t:
// 8 terms x 8 bits = 64 bits
// 7 terms x 9 bits = 63 bits
// 6 terms x 10 bits = 60 bits
// 5 terms x 12 bits = 60 bits
// 4 terms x 16 bits = 64 bits <--- current
// 3 terms x 21 bits = 63 bits
// 2 terms x 32 bits = 64 bits
//
#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
using DFAInput = uint64_t;
using DFAStateInput = int64_t;
#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
// --------------------------------------------------------------------
class DFAPacketizer {
private:
const InstrItineraryData *InstrItins;
Automaton<DFAInput> A;
Automaton<uint64_t> A;
/// For every itinerary, an "action" to apply to the automaton. This removes
/// the redundancy in actions between itinerary classes.
ArrayRef<unsigned> ItinActions;
public:
DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a) :
InstrItins(InstrItins), A(std::move(a)) {
DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a,
ArrayRef<unsigned> ItinActions)
: InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) {
// Start off with resource tracking disabled.
A.enableTranscription(false);
}
@ -99,12 +74,6 @@ public:
A.enableTranscription(Track);
}
// Return the DFAInput for an instruction class.
DFAInput getInsnInput(unsigned InsnClass);
// Return the DFAInput for an instruction class input vector.
static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
// Check if the resources occupied by a MCInstrDesc are available in
// the current state.
bool canReserveResources(const MCInstrDesc *MID);

View File

@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
static unsigned InstrCount = 0;
// --------------------------------------------------------------------
// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
}
/// Return the DFAInput for an instruction class input vector.
/// This function is used in both DFAPacketizer.cpp and in
/// DFAPacketizerEmitter.cpp.
static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
DFAInput InsnInput = 0;
assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
"Exceeded maximum number of DFA terms");
for (auto U : InsnClass)
InsnInput = addDFAFuncUnits(InsnInput, U);
return InsnInput;
}
// --------------------------------------------------------------------
// Make sure DFA types are large enough for the number of terms & resources.
static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
(8 * sizeof(DFAInput)),
"(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
static_assert(
(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
"(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
// Return the DFAInput for an instruction class.
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
// Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
DFAInput InsnInput = 0;
unsigned i = 0;
(void)i;
for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
*IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
}
return InsnInput;
}
// Return the DFAInput for an instruction class input vector.
DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
return getDFAInsnInput(InsnClass);
}
// Check if the resources occupied by a MCInstrDesc are available in the
// current state.
bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
return A.canAdd(InsnInput);
unsigned Action = ItinActions[MID->getSchedClass()];
if (MID->getSchedClass() == 0 || Action == 0)
return false;
return A.canAdd(Action);
}
// Reserve the resources occupied by a MCInstrDesc and change the current
// state to reflect that change.
void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
A.add(InsnInput);
unsigned Action = ItinActions[MID->getSchedClass()];
if (MID->getSchedClass() == 0 || Action == 0)
return;
A.add(Action);
}
// Check if the resources occupied by a machine instruction are available

View File

@ -16,15 +16,16 @@
#define DEBUG_TYPE "dfa-emitter"
#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
#include "DFAEmitter.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
#include <map>
@ -35,216 +36,101 @@
using namespace llvm;
// --------------------------------------------------------------------
// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
//
// e.g. terms x resource bit combinations that fit in uint32_t:
// 4 terms x 8 bits = 32 bits
// 3 terms x 10 bits = 30 bits
// 2 terms x 16 bits = 32 bits
//
// e.g. terms x resource bit combinations that fit in uint64_t:
// 8 terms x 8 bits = 64 bits
// 7 terms x 9 bits = 63 bits
// 6 terms x 10 bits = 60 bits
// 5 terms x 12 bits = 60 bits
// 4 terms x 16 bits = 64 bits <--- current
// 3 terms x 21 bits = 63 bits
// 2 terms x 32 bits = 64 bits
//
#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
typedef uint64_t DFAInput;
typedef int64_t DFAStateInput;
#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
// We use a uint64_t to represent a resource bitmask.
#define DFA_MAX_RESOURCES 64
namespace {
using ResourceVector = SmallVector<uint64_t, 4>;
DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
}
struct ScheduleClass {
/// The parent itinerary index (processor model ID).
unsigned ItineraryID;
/// Return the DFAInput for an instruction class input vector.
/// This function is used in both DFAPacketizer.cpp and in
/// DFAPacketizerEmitter.cpp.
DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
DFAInput InsnInput = 0;
assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
"Exceeded maximum number of DFA terms");
for (auto U : InsnClass)
InsnInput = addDFAFuncUnits(InsnInput, U);
return InsnInput;
}
/// Index within this itinerary of the schedule class.
unsigned Idx;
} // end anonymous namespace
/// The index within the uniqued set of required resources of Resources.
unsigned ResourcesIdx;
// --------------------------------------------------------------------
#ifndef NDEBUG
// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
//
// dbgsInsnClass - When debugging, print instruction class stages.
//
void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
//
// dbgsStateInfo - When debugging, print the set of state info.
//
void dbgsStateInfo(const std::set<unsigned> &stateInfo);
//
// dbgsIndent - When debugging, indent by the specified amount.
//
void dbgsIndent(unsigned indent);
#endif
//
// class DFAPacketizerEmitter: class that generates and prints out the DFA
// for resource tracking.
//
namespace {
/// Conjunctive list of resource requirements:
/// {a|b, b|c} => (a OR b) AND (b or c).
/// Resources are unique across all itineraries.
ResourceVector Resources;
};
// Generates and prints out the DFA for resource tracking.
class DFAPacketizerEmitter {
private:
std::string TargetName;
//
// allInsnClasses is the set of all possible resources consumed by an
// InstrStage.
//
std::vector<std::vector<unsigned>> allInsnClasses;
RecordKeeper &Records;
UniqueVector<ResourceVector> UniqueResources;
std::vector<ScheduleClass> ScheduleClasses;
std::map<std::string, uint64_t> FUNameToBitsMap;
std::map<unsigned, uint64_t> ComboBitToBitsMap;
public:
DFAPacketizerEmitter(RecordKeeper &R);
//
// collectAllFuncUnits - Construct a map of function unit names to bits.
//
int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
int &maxResources,
raw_ostream &OS);
// Construct a map of function unit names to bits.
int collectAllFuncUnits(
ArrayRef<const CodeGenProcModel *> ProcModels);
//
// collectAllComboFuncs - Construct a map from a combo function unit bit to
// the bits of all included functional units.
//
int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
raw_ostream &OS);
// Construct a map from a combo function unit bit to the bits of all included
// functional units.
int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList);
//
// collectOneInsnClass - Populate allInsnClasses with one instruction class.
//
int collectOneInsnClass(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
Record *ItinData,
raw_ostream &OS);
//
// collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
//
int collectAllInsnClasses(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::vector<Record*> &ItinDataList,
int &maxStages,
raw_ostream &OS);
ResourceVector getResourcesForItinerary(Record *Itinerary);
void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries);
// Emit code for a subset of itineraries.
void emitForItineraries(raw_ostream &OS,
std::vector<Record *> &ProcItinList,
std::vector<const CodeGenProcModel *> &ProcItinList,
std::string DFAName);
void run(raw_ostream &OS);
};
} // end anonymous namespace
#ifndef NDEBUG
// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
//
// dbgsInsnClass - When debugging, print instruction class stages.
//
void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
LLVM_DEBUG(dbgs() << "InsnClass: ");
for (unsigned i = 0; i < InsnClass.size(); ++i) {
if (i > 0) {
LLVM_DEBUG(dbgs() << ", ");
}
LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
}
DFAInput InsnInput = getDFAInsnInput(InsnClass);
LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
}
DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R)
: TargetName(CodeGenTarget(R).getName()), Records(R) {}
//
// dbgsIndent - When debugging, indent by the specified amount.
//
void dbgsIndent(unsigned indent) {
for (unsigned i = 0; i < indent; ++i) {
LLVM_DEBUG(dbgs() << " ");
}
}
#endif // NDEBUG
DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
TargetName(CodeGenTarget(R).getName()), Records(R) {}
//
// collectAllFuncUnits - Construct a map of function unit names to bits.
//
int DFAPacketizerEmitter::collectAllFuncUnits(
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
int &maxFUs,
raw_ostream &OS) {
ArrayRef<const CodeGenProcModel *> ProcModels) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllFuncUnits");
LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
LLVM_DEBUG(dbgs() << " (" << ProcModels.size() << " itineraries)\n");
std::set<Record *> ProcItinList;
for (const CodeGenProcModel *Model : ProcModels)
ProcItinList.insert(Model->ItinsDef);
int totalFUs = 0;
// Parse functional units for all the itineraries.
for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
Record *Proc = ProcItinList[i];
std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
for (Record *Proc : ProcItinList) {
std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU");
LLVM_DEBUG(dbgs() << " FU:" << i << " (" << FUs.size() << " FUs) "
<< Proc->getName());
LLVM_DEBUG(dbgs() << " FU:"
<< " (" << FUs.size() << " FUs) " << Proc->getName());
// Convert macros to bits for each stage.
unsigned numFUs = FUs.size();
for (unsigned j = 0; j < numFUs; ++j) {
assert ((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of representable resources");
unsigned FuncResources = (unsigned) (1U << j);
assert((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of representable resources");
uint64_t FuncResources = 1ULL << j;
FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
<< Twine::utohexstr(FuncResources));
}
if (((int) numFUs) > maxFUs) {
maxFUs = numFUs;
}
totalFUs += numFUs;
LLVM_DEBUG(dbgs() << "\n");
}
return totalFUs;
}
//
// collectAllComboFuncs - Construct a map from a combo function unit bit to
// the bits of all included functional units.
//
int DFAPacketizerEmitter::collectAllComboFuncs(
std::vector<Record*> &ComboFuncList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
raw_ostream &OS) {
int DFAPacketizerEmitter::collectAllComboFuncs(ArrayRef<Record *> ComboFuncList) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
@ -253,27 +139,27 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
int numCombos = 0;
for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
Record *Func = ComboFuncList[i];
std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD");
LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) "
<< Func->getName() << "\n");
// Convert macros to bits for each stage.
for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
assert ((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of DFA resources");
assert((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of DFA resources");
Record *FuncData = FUs[j];
Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
const std::vector<Record*> &FuncList =
FuncData->getValueAsListOfDefs("FuncList");
const std::vector<Record *> &FuncList =
FuncData->getValueAsListOfDefs("FuncList");
const std::string &ComboFuncName = ComboFunc->getName();
unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
unsigned ComboResources = ComboBit;
uint64_t ComboBit = FUNameToBitsMap[ComboFuncName];
uint64_t ComboResources = ComboBit;
LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x"
<< Twine::utohexstr(ComboResources) << "\n");
for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
std::string FuncName = FuncList[k]->getName();
unsigned FuncResources = FUNameToBitsMap[FuncName];
uint64_t FuncResources = FUNameToBitsMap[FuncName];
LLVM_DEBUG(dbgs() << " " << FuncName << ":0x"
<< Twine::utohexstr(FuncResources) << "\n");
ComboResources |= FuncResources;
@ -288,101 +174,33 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
return numCombos;
}
//
// collectOneInsnClass - Populate allInsnClasses with one instruction class
//
int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
Record *ItinData,
raw_ostream &OS) {
const std::vector<Record*> &StageList =
ItinData->getValueAsListOfDefs("Stages");
// The number of stages.
unsigned NStages = StageList.size();
LLVM_DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName()
<< "\n");
std::vector<unsigned> UnitBits;
// Compute the bitwise or of each unit used in this stage.
for (unsigned i = 0; i < NStages; ++i) {
const Record *Stage = StageList[i];
// Get unit list.
const std::vector<Record*> &UnitList =
Stage->getValueAsListOfDefs("Units");
LLVM_DEBUG(dbgs() << " stage:" << i << " [" << UnitList.size()
<< " units]:");
unsigned dbglen = 26; // cursor after stage dbgs
// Compute the bitwise or of each unit used in this stage.
unsigned UnitBitValue = 0;
for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
// Conduct bitwise or.
std::string UnitName = UnitList[j]->getName();
LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName);
dbglen += 3 + UnitName.length();
assert(FUNameToBitsMap.count(UnitName));
UnitBitValue |= FUNameToBitsMap[UnitName];
ResourceVector
DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
ResourceVector Resources;
assert(Itinerary);
for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
uint64_t StageResources = 0;
for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
StageResources |= FUNameToBitsMap[Unit->getName()];
}
if (UnitBitValue != 0)
UnitBits.push_back(UnitBitValue);
while (dbglen <= 64) { // line up bits dbgs
dbglen += 8;
LLVM_DEBUG(dbgs() << "\t");
}
LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue)
<< ")\n");
if (StageResources != 0)
Resources.push_back(StageResources);
}
if (!UnitBits.empty())
allInsnClasses.push_back(UnitBits);
LLVM_DEBUG({
dbgs() << " ";
dbgsInsnClass(UnitBits);
dbgs() << "\n";
});
return NStages;
return Resources;
}
//
// collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
//
int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::vector<Record*> &ItinDataList,
int &maxStages,
raw_ostream &OS) {
// Collect all instruction classes.
unsigned M = ItinDataList.size();
int numInsnClasses = 0;
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n"
<< "collectAllInsnClasses " << ProcName << " (" << M
<< " classes)\n");
// Collect stages for each instruction class for all itinerary data
for (unsigned j = 0; j < M; j++) {
Record *ItinData = ItinDataList[j];
int NStages = collectOneInsnClass(ProcName, ProcItinList,
FUNameToBitsMap, ItinData, OS);
if (NStages > maxStages) {
maxStages = NStages;
void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
const RecVec &Itineraries) {
unsigned Idx = 0;
for (Record *Itinerary : Itineraries) {
if (!Itinerary) {
ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}});
continue;
}
numInsnClasses++;
ResourceVector Resources = getResourcesForItinerary(Itinerary);
ScheduleClasses.push_back(
{ItineraryIdx, Idx++, UniqueResources.insert(Resources), Resources});
}
return numInsnClasses;
}
//
@ -393,19 +211,17 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
<< "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
OS << "namespace llvm {\n";
OS << "\n// Input format:\n";
OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
<< "\t// maximum AND'ed resource terms\n";
OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
<< "\t// maximum resource bits in one term\n";
CodeGenTarget CGT(Records);
CodeGenSchedModels CGS(Records, CGT);
// Collect processor iteraries.
std::vector<Record*> ProcItinList =
Records.getAllDerivedDefinitions("ProcessorItineraries");
std::unordered_map<std::string, std::vector<Record*>> ItinsByNamespace;
for (Record *R : ProcItinList)
ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R);
std::unordered_map<std::string, std::vector<const CodeGenProcModel *>>
ItinsByNamespace;
for (const CodeGenProcModel &ProcModel : CGS.procModels()) {
if (ProcModel.hasItineraries()) {
auto NS = ProcModel.ItinsDef->getValueAsString("PacketizerNamespace");
ItinsByNamespace[NS].push_back(&ProcModel);
}
}
for (auto &KV : ItinsByNamespace)
emitForItineraries(OS, KV.second, KV.first);
@ -413,80 +229,68 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
}
void DFAPacketizerEmitter::emitForItineraries(
raw_ostream &OS, std::vector<Record *> &ProcItinList,
raw_ostream &OS, std::vector<const CodeGenProcModel *> &ProcModels,
std::string DFAName) {
//
// Collect the Functional units.
//
std::map<std::string, unsigned> FUNameToBitsMap;
int maxResources = 0;
collectAllFuncUnits(ProcItinList,
FUNameToBitsMap, maxResources, OS);
OS << "} // end namespace llvm\n\n";
OS << "namespace {\n";
collectAllFuncUnits(ProcModels);
collectAllComboFuncs(Records.getAllDerivedDefinitions("ComboFuncUnits"));
//
// Collect the Combo Functional units.
//
std::map<unsigned, unsigned> ComboBitToBitsMap;
std::vector<Record*> ComboFuncList =
Records.getAllDerivedDefinitions("ComboFuncUnits");
collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS);
//
// Collect the itineraries.
//
int maxStages = 0;
int numInsnClasses = 0;
for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
Record *Proc = ProcItinList[i];
// Get processor itinerary name.
const std::string &ProcName = Proc->getName();
// Skip default.
if (ProcName == "NoItineraries")
continue;
// Sanity check for at least one instruction itinerary class.
unsigned NItinClasses =
Records.getAllDerivedDefinitions("InstrItinClass").size();
if (NItinClasses == 0)
return;
// Get itinerary data list.
std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
// Collect all instruction classes
numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
FUNameToBitsMap, ItinDataList, maxStages, OS);
DenseMap<const CodeGenProcModel *, unsigned> ProcModelStartIdx;
for (const CodeGenProcModel *Model : ProcModels) {
assert(Model->hasItineraries());
ProcModelStartIdx[Model] = ScheduleClasses.size();
createScheduleClasses(Model->Index, Model->ItinDefList);
}
// Output the mapping from ScheduleClass to ResourcesIdx.
unsigned Idx = 0;
OS << "unsigned " << TargetName << DFAName << "ResourceIndices[] = {";
for (const ScheduleClass &SC : ScheduleClasses) {
if (Idx++ % 32 == 0)
OS << "\n ";
OS << SC.ResourcesIdx << ", ";
}
OS << "\n};\n\n";
// And the mapping from Itinerary index into the previous table.
OS << "unsigned " << TargetName << DFAName
<< "ProcResourceIndexStart[] = {\n";
OS << " 0, // NoSchedModel\n";
for (const CodeGenProcModel *Model : ProcModels) {
OS << " " << ProcModelStartIdx[Model] << ", // " << Model->ModelName
<< "\n";
}
OS << ScheduleClasses.size() << "\n};\n\n";
// The type of a state in the nondeterministic automaton we're defining.
using NfaStateTy = unsigned;
using NfaStateTy = uint64_t;
// Given a resource state, return all resource states by applying
// InsnClass.
auto applyInsnClass = [&](ArrayRef<unsigned> InsnClass,
NfaStateTy State) -> std::deque<unsigned> {
std::deque<unsigned> V(1, State);
auto applyInsnClass = [&](const ResourceVector &InsnClass,
NfaStateTy State) -> std::deque<NfaStateTy> {
std::deque<NfaStateTy> V(1, State);
// Apply every stage in the class individually.
for (unsigned Stage : InsnClass) {
for (NfaStateTy Stage : InsnClass) {
// Apply this stage to every existing member of V in turn.
size_t Sz = V.size();
for (unsigned I = 0; I < Sz; ++I) {
unsigned S = V.front();
NfaStateTy S = V.front();
V.pop_front();
// For this stage, state combination, try all possible resources.
for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) {
unsigned ResourceMask = 1U << J;
NfaStateTy ResourceMask = 1ULL << J;
if ((ResourceMask & Stage) == 0)
// This resource isn't required by this stage.
continue;
unsigned Combo = ComboBitToBitsMap[ResourceMask];
NfaStateTy Combo = ComboBitToBitsMap[ResourceMask];
if (Combo && ((~S & Combo) != Combo))
// This combo units bits are not available.
continue;
unsigned ResultingResourceState = S | ResourceMask | Combo;
NfaStateTy ResultingResourceState = S | ResourceMask | Combo;
if (ResultingResourceState == S)
continue;
V.push_back(ResultingResourceState);
@ -499,9 +303,9 @@ void DFAPacketizerEmitter::emitForItineraries(
// Given a resource state, return a quick (conservative) guess as to whether
// InsnClass can be applied. This is a filter for the more heavyweight
// applyInsnClass.
auto canApplyInsnClass = [](ArrayRef<unsigned> InsnClass,
auto canApplyInsnClass = [](const ResourceVector &InsnClass,
NfaStateTy State) -> bool {
for (unsigned Resources : InsnClass) {
for (NfaStateTy Resources : InsnClass) {
if ((State | Resources) == State)
return false;
}
@ -515,20 +319,18 @@ void DFAPacketizerEmitter::emitForItineraries(
while (!Worklist.empty()) {
NfaStateTy State = Worklist.front();
Worklist.pop_front();
for (unsigned i = 0; i < allInsnClasses.size(); i++) {
const std::vector<unsigned> &InsnClass = allInsnClasses[i];
if (!canApplyInsnClass(InsnClass, State))
for (const ResourceVector &Resources : UniqueResources) {
if (!canApplyInsnClass(Resources, State))
continue;
for (unsigned NewState : applyInsnClass(InsnClass, State)) {
unsigned ResourcesID = UniqueResources.idFor(Resources);
for (uint64_t NewState : applyInsnClass(Resources, State)) {
if (SeenStates.emplace(NewState).second)
Worklist.emplace_back(NewState);
Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass));
Emitter.addTransition(State, NewState, ResourcesID);
}
}
}
OS << "} // end namespace llvm\n\n";
OS << "namespace {\n";
std::string TargetAndDFAName = TargetName + DFAName;
Emitter.emit(TargetAndDFAName, OS);
OS << "} // end anonymous namespace\n\n";
@ -541,7 +343,13 @@ void DFAPacketizerEmitter::emitForItineraries(
<< " static Automaton<uint64_t> A(ArrayRef<" << TargetAndDFAName
<< "Transition>(" << TargetAndDFAName << "Transitions), "
<< TargetAndDFAName << "TransitionInfo);\n"
<< " return new DFAPacketizer(IID, A);\n"
<< " unsigned ProcResIdxStart = " << TargetAndDFAName
<< "ProcResourceIndexStart[IID->SchedModel.ProcID];\n"
<< " unsigned ProcResIdxNum = " << TargetAndDFAName
<< "ProcResourceIndexStart[IID->SchedModel.ProcID + 1] - "
"ProcResIdxStart;\n"
<< " return new DFAPacketizer(IID, A, {&" << TargetAndDFAName
<< "ResourceIndices[ProcResIdxStart], ProcResIdxNum});\n"
<< "\n}\n\n";
}