1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[DFAPacketizer] Track resources for packetized instructions

This patch allows the DFAPacketizer to be queried after a packet is formed to work out which
resources were allocated to the packetized instructions.

This is particularly important for targets that do their own bundle packing - it's not
sufficient to know simply that instructions can share a packet; which slots are used is
also required for encoding.

This extends the emitter to emit a side-table containing resource usage diffs for each
state transition. The packetizer maintains a set of all possible resource states in its
current state. After packetization is complete, all remaining resource states are
possible packetization strategies.

The sidetable is only ~500K for Hexagon, but the extra tracking is disabled by default
(most uses of the packetizer like MachinePipeliner don't care and don't need the extra
maintained state).

Differential Revision: https://reviews.llvm.org/D66936

llvm-svn: 371198
This commit is contained in:
James Molloy 2019-09-06 12:20:08 +00:00
parent adbf1f1abb
commit bd5b6d09b9
5 changed files with 229 additions and 60 deletions

View File

@ -82,20 +82,53 @@ private:
int CurrentState = 0;
const DFAStateInput (*DFAStateInputTable)[2];
const unsigned *DFAStateEntryTable;
const std::pair<unsigned, unsigned> *DFAResourceTransitionTable;
const unsigned *DFAResourceTransitionEntryTable;
// CachedTable is a map from <FromState, Input> to ToState.
DenseMap<UnsignPair, unsigned> CachedTable;
// CachedResourceTransitions is a map from <FromState, Input> to a list of
// resource transitions.
DenseMap<UnsignPair, ArrayRef<std::pair<unsigned, unsigned>>>
CachedResourceTransitions;
// Read the DFA transition table and update CachedTable.
void ReadTable(unsigned state);
bool TrackResources = false;
// State for the current packet. Every entry is a possible packing of the
// bundle, indexed by cumulative resource state. Each entry is a list of the
// cumulative resource states after packing each instruction. For example if
// we pack I0: [0x4] and I1: [0x2] we will end up with:
// ResourceStates[0x6] = [0x4, 0x6]
DenseMap<unsigned, SmallVector<unsigned, 8>> ResourceStates;
public:
DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
const unsigned *SET);
const unsigned *SET,
const std::pair<unsigned, unsigned> *RTT = nullptr,
const unsigned *RTET = nullptr);
// Reset the current state to make all resources available.
void clearResources() {
CurrentState = 0;
ResourceStates.clear();
ResourceStates[0] = {};
}
// Set whether this packetizer should track not just whether instructions
// can be packetized, but also which functional units each instruction ends up
// using after packetization.
void setTrackResources(bool Track) {
if (Track != TrackResources) {
TrackResources = Track;
if (Track) {
CachedTable.clear();
assert(DFAResourceTransitionEntryTable);
assert(DFAResourceTransitionTable);
}
}
assert(CurrentState == 0 && "Can only change TrackResources on an empty packetizer!");
}
// Return the DFAInput for an instruction class.
@ -120,6 +153,15 @@ public:
// current state to reflect that change.
void reserveResources(MachineInstr &MI);
// Return the resources used by the InstIdx'th instruction added to this
// packet. The resources are returned as a bitvector of functional units.
//
// Note that a bundle may be packed in multiple valid ways. This function
// returns one arbitary valid packing.
//
// Requires setTrackResources(true) to have been called.
unsigned getUsedResources(unsigned InstIdx);
const InstrItineraryData *getInstrItins() const { return InstrItins; }
};

View File

@ -23,6 +23,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@ -72,9 +73,11 @@ static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
// --------------------------------------------------------------------
DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
const DFAStateInput (*SIT)[2],
const unsigned *SET):
InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
const DFAStateInput (*SIT)[2], const unsigned *SET,
const std::pair<unsigned, unsigned> *RTT,
const unsigned *RTET)
: InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET),
DFAResourceTransitionTable(RTT), DFAResourceTransitionEntryTable(RTET) {
// Make sure DFA types are large enough for the number of terms & resources.
static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
(8 * sizeof(DFAInput)),
@ -82,6 +85,7 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
static_assert(
(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
"(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
clearResources();
}
// Read the DFA transition table and update CachedTable.
@ -93,16 +97,26 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
// for the ith state
//
void DFAPacketizer::ReadTable(unsigned int state) {
unsigned ThisState = DFAStateEntryTable[state];
unsigned NextStateInTable = DFAStateEntryTable[state+1];
unsigned ThisStateIdx = DFAStateEntryTable[state];
unsigned NextStateIdxInTable = DFAStateEntryTable[state + 1];
// Early exit in case CachedTable has already contains this
// state's transitions.
if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisStateIdx][0])))
return;
for (unsigned i = ThisState; i < NextStateInTable; i++)
CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
DFAStateInputTable[i][1];
for (unsigned TransitionIdx = ThisStateIdx;
TransitionIdx < NextStateIdxInTable; TransitionIdx++) {
auto TransitionPair =
UnsignPair(state, DFAStateInputTable[TransitionIdx][0]);
CachedTable[TransitionPair] = DFAStateInputTable[TransitionIdx][1];
if (TrackResources) {
unsigned I = DFAResourceTransitionEntryTable[TransitionIdx];
unsigned E = DFAResourceTransitionEntryTable[TransitionIdx + 1];
CachedResourceTransitions[TransitionPair] = makeArrayRef(
&DFAResourceTransitionTable[I], &DFAResourceTransitionTable[E]);
}
}
}
// Return the DFAInput for an instruction class.
@ -141,6 +155,16 @@ void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
if (TrackResources) {
DenseMap<unsigned, SmallVector<unsigned, 8>> NewResourceStates;
for (const auto &KV : CachedResourceTransitions[StateTrans]) {
assert(ResourceStates.count(KV.first));
NewResourceStates[KV.second] = ResourceStates[KV.first];
NewResourceStates[KV.second].push_back(KV.second);
}
ResourceStates = NewResourceStates;
}
assert(CachedTable.count(StateTrans) != 0);
CurrentState = CachedTable[StateTrans];
}
@ -159,6 +183,21 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) {
reserveResources(&MID);
}
unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
assert(TrackResources && "getUsedResources requires resource tracking!");
// Assert that there is at least one example of a valid bundle format.
assert(!ResourceStates.empty() && "Invalid bundle!");
SmallVectorImpl<unsigned> &RS = ResourceStates.begin()->second;
// RS stores the cumulative resources used up to and including the I'th
// instruction. The 0th instruction is the base case.
if (InstIdx == 0)
return RS[0];
// Return the difference between the cumulative resources used by InstIdx and
// its predecessor.
return RS[InstIdx] ^ RS[InstIdx - 1];
}
namespace llvm {
// This class extends ScheduleDAGInstrs and overrides the schedule method
@ -210,6 +249,7 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
MachineLoopInfo &mli, AliasAnalysis *aa)
: MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
ResourceTracker->setTrackResources(true);
VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
}
@ -224,8 +264,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
LLVM_DEBUG({
if (!CurrentPacketMIs.empty()) {
dbgs() << "Finalizing packet:\n";
for (MachineInstr *MI : CurrentPacketMIs)
dbgs() << " * " << *MI;
unsigned Idx = 0;
for (MachineInstr *MI : CurrentPacketMIs) {
unsigned R = ResourceTracker->getUsedResources(Idx++);
dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
}
}
});
if (CurrentPacketMIs.size() > 1) {

View File

@ -24,6 +24,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@ -1763,6 +1764,16 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator EndMI) {
// Replace VLIWPacketizerList::endPacket(MBB, EndMI).
LLVM_DEBUG({
if (!CurrentPacketMIs.empty()) {
dbgs() << "Finalizing packet:\n";
unsigned Idx = 0;
for (MachineInstr *MI : CurrentPacketMIs) {
unsigned R = ResourceTracker->getUsedResources(Idx++);
dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
}
}
});
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {

View File

@ -0,0 +1,29 @@
; RUN: llc -O2 -march=hexagon < %s -debug-only=packets 2>&1 | FileCheck %s
; REQUIRES: asserts
; CHECK: Finalizing packet:
; CHECK-NEXT: * [res:0x8] renamable $r1 = S2_vsplatrb renamable $r0
; CHECK-NEXT: * [res:0x4] renamable $d1 = S2_vsplatrh killed renamable $r0
target triple = "hexagon"
; Function Attrs: nounwind readnone
define i64 @f0(i64 %a0) #0 {
b0:
%v0 = trunc i64 %a0 to i32
%v1 = and i32 %v0, 65535
%v2 = tail call i64 @llvm.hexagon.S2.vsplatrh(i32 %v1)
%v3 = and i32 %v0, 255
%v4 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v3)
%v5 = sext i32 %v4 to i64
%v6 = add nsw i64 %v5, %v2
ret i64 %v6
}
; Function Attrs: nounwind readnone
declare i64 @llvm.hexagon.S2.vsplatrh(i32) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S2.vsplatrb(i32) #0
attributes #0 = { nounwind readnone }

View File

@ -192,7 +192,14 @@ class State {
const int stateNum;
mutable bool isInitial;
mutable std::set<unsigned> stateInfo;
typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
struct TransitionInfo {
// Maps from a resource bitmask in this state to the equivalent resource
// bitmap in the transitioned-to state. This is a 1-to-N mapping.
std::vector<std::pair<unsigned, unsigned>> ResourceTransitions;
const State *S;
};
using TransitionMap = std::map<std::vector<unsigned>, TransitionInfo>;
mutable TransitionMap Transitions;
State();
@ -221,9 +228,14 @@ class State {
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
//
void AddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates) const;
// TransitionInfo maps from a resource bitmask B in this state to a resource
// bitmask B' in PossibleStates. This is a one-to-many (or none) mapping.
//
void AddInsnClass(
std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates,
std::vector<std::pair<unsigned, unsigned>> &TransitionInfo) const;
//
// AddInsnClassStages - Return all combinations of resource reservation
@ -231,16 +243,17 @@ class State {
// which are possible from this state (PossibleStates).
//
void AddInsnClassStages(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
unsigned chkstage, unsigned numstages,
unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates,
std::set<unsigned> &PossibleStates) const;
std::map<unsigned, unsigned> &ComboBitToBitsMap,
unsigned chkstage, unsigned numstages,
unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates) const;
//
// addTransition - Add a transition from this state given the input InsnClass
// addTransition - Add a transition from this state given the input InsnClass.
//
void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
void addTransition(
std::vector<unsigned> InsnClass, const State *To,
const std::vector<std::pair<unsigned, unsigned>> &TransitionInfo) const;
//
// hasTransition - Returns true if there is a transition from this state
@ -329,11 +342,12 @@ State::State() :
//
// addTransition - Add a transition from this state given the input InsnClass
//
void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
const {
void State::addTransition(
std::vector<unsigned> InsnClass, const State *To,
const std::vector<std::pair<unsigned, unsigned>> &TransitionInfo) const {
assert(!Transitions.count(InsnClass) &&
"Cannot have multiple transitions for the same input");
Transitions[InsnClass] = To;
Transitions[InsnClass] = {TransitionInfo, To};
}
//
@ -351,9 +365,11 @@ bool State::hasTransition(std::vector<unsigned> InsnClass) const {
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
//
void State::AddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates) const {
void State::AddInsnClass(
std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates,
std::vector<std::pair<unsigned, unsigned>> &TransitionInfo) const {
//
// Iterate over all resource states in currentState.
//
@ -362,25 +378,26 @@ void State::AddInsnClass(std::vector<unsigned> &InsnClass,
for (std::set<unsigned>::iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI) {
unsigned thisState = *SI;
unsigned ThisState = *SI;
DenseSet<unsigned> VisitedResourceStates;
LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState)
LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(ThisState)
<< "\n");
AddInsnClassStages(InsnClass, ComboBitToBitsMap,
numstages - 1, numstages,
thisState, thisState,
VisitedResourceStates, PossibleStates);
AddInsnClassStages(InsnClass, ComboBitToBitsMap, numstages - 1, numstages,
ThisState, ThisState, VisitedResourceStates);
for (unsigned NewState : VisitedResourceStates) {
PossibleStates.insert(NewState);
TransitionInfo.emplace_back(ThisState, NewState);
}
}
}
void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
unsigned chkstage, unsigned numstages,
unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates,
std::set<unsigned> &PossibleStates) const {
void State::AddInsnClassStages(
std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap, unsigned chkstage,
unsigned numstages, unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates) const {
assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
unsigned thisStage = InsnClass[chkstage];
@ -438,7 +455,6 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
if (ResultingResourceState != prevState) {
if (VisitedResourceStates.count(ResultingResourceState) == 0) {
VisitedResourceStates.insert(ResultingResourceState);
PossibleStates.insert(ResultingResourceState);
LLVM_DEBUG(dbgs()
<< "\tResultingResourceState: 0x"
<< Twine::utohexstr(ResultingResourceState) << "\n");
@ -456,10 +472,9 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
//
if (ResultingResourceState != prevState) {
LLVM_DEBUG(dbgs() << "\n");
AddInsnClassStages(InsnClass, ComboBitToBitsMap,
chkstage - 1, numstages,
ResultingResourceState, origState,
VisitedResourceStates, PossibleStates);
AddInsnClassStages(InsnClass, ComboBitToBitsMap, chkstage - 1,
numstages, ResultingResourceState, origState,
VisitedResourceStates);
} else {
LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
}
@ -578,17 +593,10 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
II = SI->Transitions.begin(), IE = SI->Transitions.end();
II != IE; ++II) {
OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", "
<< II->second->stateNum << "},\t";
<< II->second.S->stateNum << "},\t";
}
ValidTransitions += SI->Transitions.size();
// If there are no valid transitions from this stage, we need a sentinel
// transition.
if (ValidTransitions == StateEntry[i]) {
OS << SentinelEntry << ",\t";
++ValidTransitions;
}
OS << " // state " << i << ": " << StateEntry[i];
if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition.
OS << "-" << (ValidTransitions-1);
@ -610,8 +618,6 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
OS << "// " << numStates << " states\n";
OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
// Multiply i by 2 since each entry in DFAStateInputTable is a set of
// two numbers.
unsigned lastState = 0;
for (unsigned i = 0; i < numStates; ++i) {
if (i && ((i % 10) == 0)) {
@ -620,11 +626,44 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
}
OS << StateEntry[i] << ", ";
}
// Print out the index to the sentinel entry in StateInputTable
OS << ValidTransitions << ", ";
OS << " // states " << (lastState+1) << ":" << numStates << "\n";
OS << "};\n";
// Generate the resource transition table.
OS << "const std::pair<unsigned, unsigned> " << TargetName
<< "DFAResourceTransitionTable[] = { \n";
int N = 0;
StateEntry.clear();
for (const State &S : states) {
for (auto &KV : S.Transitions) {
StateEntry.push_back(N);
for (std::pair<unsigned, unsigned> &T : KV.second.ResourceTransitions) {
OS << "{0x" << utohexstr(T.first) << ", 0x" << utohexstr(T.second)
<< "}, ";
++N;
}
}
OS << "\n ";
}
// Add a sentinel entry to terminate the search.
StateEntry.push_back(N);
OS << "\n {~0U,~0U}\n};\n\n";
OS << "// " << TargetName << "DFAResourceTransitionEntryTable[i] = "
<< "Index of the first entry in DFAResourceTransitionTable for\n";
OS << "// the ith transition.\n";
OS << "const unsigned int " << TargetName
<< "DFAResourceTransitionEntryTable[] = { \n";
N = 0;
for (int S : StateEntry) {
OS << S << ",";
if (N++ % 10 == 0)
OS << "\n ";
}
OS << "\n ~0U\n};\n";
}
//
@ -946,7 +985,9 @@ void DFAPacketizerEmitter::emitForItineraries(
if (!current->hasTransition(InsnClass) &&
current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
const State *NewState = nullptr;
current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
std::vector<std::pair<unsigned, unsigned>> TransitionInfo;
current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources,
TransitionInfo);
if (NewStateResources.empty()) {
LLVM_DEBUG(dbgs() << " Skipped - no new states generated\n");
continue;
@ -982,7 +1023,7 @@ void DFAPacketizerEmitter::emitForItineraries(
});
}
current->addTransition(InsnClass, NewState);
current->addTransition(InsnClass, NewState, TransitionInfo);
}
}
}
@ -1000,7 +1041,10 @@ void DFAPacketizerEmitter::emitForItineraries(
<< "DFAPacketizer(const InstrItineraryData *IID) const {\n"
<< " return new DFAPacketizer(IID, " << TargetName << DFAName
<< "DFAStateInputTable, " << TargetName << DFAName
<< "DFAStateEntryTable);\n}\n\n";
<< "DFAStateEntryTable, " << TargetName << DFAName
<< "DFAResourceTransitionTable, " << TargetName << DFAName
<< "DFAResourceTransitionEntryTable"
<< ");\n}\n\n";
}
namespace llvm {