1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

R600: initial scheduler code

This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.

Vincent Lejeune:
 - Support for VLIW4 Slot assignement
 - Recomputation of ScheduleDAG to get more parallelism opportunities

Tom Stellard:
 - Fix assertion failure when trying to determine an instruction's slot
   based on its destination register's class
 - Fix some compiler warnings

Vincent Lejeune: [v2]
 - Remove recomputation of ScheduleDAG (will be provided in a later patch)
 - Improve estimation of an ALU clause size so that heuristic does not emit cf
 instructions at the wrong position.
 - Make schedule heuristic smarter using SUnit Depth
 - Take constant read limitations into account

Vincent Lejeune: [v3]
 - Fix some uninitialized values in ConstPair
 - Add asserts to ensure an ALU slot is always populated

llvm-svn: 176498
This commit is contained in:
Vincent Lejeune 2013-03-05 18:41:32 +00:00
parent 7475aaf23e
commit 0db35fba3a
3 changed files with 624 additions and 1 deletions

View File

@ -17,6 +17,7 @@
#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() {
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
return new ScheduleDAGMI(C, new R600SchedStrategy());
}
static MachineSchedRegistry
SchedCustomRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@ -70,7 +79,13 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
: TargetPassConfig(TM, PM) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
enablePass(&MachineSchedulerID);
MachineSchedRegistry::setDefault(createR600MachineScheduler);
}
}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();

View File

@ -0,0 +1,487 @@
//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 Machine Scheduler interface
// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "misched"
#include "R600MachineScheduler.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
#include <set>
#include <iostream>
using namespace llvm;
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
DAG = dag;
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
MRI = &DAG->MRI;
Available[IDAlu]->clear();
Available[IDFetch]->clear();
Available[IDOther]->clear();
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
} else {
InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
}
}
void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
{
if (QSrc->empty())
return;
for (ReadyQueue::iterator I = QSrc->begin(),
E = QSrc->end(); I != E; ++I) {
(*I)->NodeQueueId &= ~QSrc->getID();
QDst->push(*I);
}
QSrc->clear();
}
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
SUnit *SU = 0;
IsTopNode = true;
NextInstKind = IDOther;
// check if we might want to switch current clause type
bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
(CurEmitted > InstKindLimit[CurInstKind]) ||
(Available[CurInstKind]->empty());
bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
(!Available[IDFetch]->empty() || !Available[IDOther]->empty());
if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
(!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
// try to pick ALU
SU = pickAlu();
if (SU) {
if (CurEmitted > InstKindLimit[IDAlu])
CurEmitted = 0;
NextInstKind = IDAlu;
}
}
if (!SU) {
// try to pick FETCH
SU = pickOther(IDFetch);
if (SU)
NextInstKind = IDFetch;
}
// try to pick other
if (!SU) {
SU = pickOther(IDOther);
if (SU)
NextInstKind = IDOther;
}
DEBUG(
if (SU) {
dbgs() << "picked node: ";
SU->dump(DAG);
} else {
dbgs() << "NO NODE ";
for (int i = 0; i < IDLast; ++i) {
Available[i]->dump();
Pending[i]->dump();
}
for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
const SUnit &S = DAG->SUnits[i];
if (!S.isScheduled)
S.dump(DAG);
}
}
);
return SU;
}
void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
DEBUG(dbgs() << "scheduled: ");
DEBUG(SU->dump(DAG));
if (NextInstKind != CurInstKind) {
DEBUG(dbgs() << "Instruction Type Switch\n");
if (NextInstKind != IDAlu)
OccupedSlotsMask = 15;
CurEmitted = 0;
CurInstKind = NextInstKind;
}
if (CurInstKind == IDAlu) {
switch (getAluKind(SU)) {
case AluT_XYZW:
CurEmitted += 4;
break;
case AluDiscarded:
break;
default: {
++CurEmitted;
for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
E = SU->getInstr()->operands_end(); It != E; ++It) {
MachineOperand &MO = *It;
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
++CurEmitted;
}
}
}
} else {
++CurEmitted;
}
DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
if (CurInstKind != IDFetch) {
MoveUnits(Pending[IDFetch], Available[IDFetch]);
}
MoveUnits(Pending[IDOther], Available[IDOther]);
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
int IK = getInstKind(SU);
DEBUG(dbgs() << IK << " <= ");
DEBUG(SU->dump(DAG));
Pending[IK]->push(SU);
}
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
}
bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
const TargetRegisterClass *RC) const {
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
return RC->contains(Reg);
} else {
return MRI->getRegClass(Reg) == RC;
}
}
R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
MachineInstr *MI = SU->getInstr();
switch (MI->getOpcode()) {
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
return AluT_XYZW;
case AMDGPU::COPY:
if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
// %vregX = COPY Tn_X is likely to be discarded in favor of an
// assignement of Tn_X to %vregX, don't considers it in scheduling
return AluDiscarded;
}
else if (MI->getOperand(1).isUndef()) {
// MI will become a KILL, don't considers it in scheduling
return AluDiscarded;
}
default:
break;
}
// Does the instruction take a whole IG ?
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
TII->isReductionOp(MI->getOpcode()))
return AluT_XYZW;
// Is the result already assigned to a channel ?
unsigned DestSubReg = MI->getOperand(0).getSubReg();
switch (DestSubReg) {
case AMDGPU::sub0:
return AluT_X;
case AMDGPU::sub1:
return AluT_Y;
case AMDGPU::sub2:
return AluT_Z;
case AMDGPU::sub3:
return AluT_W;
default:
break;
}
// Is the result already member of a X/Y/Z/W class ?
unsigned DestReg = MI->getOperand(0).getReg();
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
return AluT_X;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
return AluT_Y;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
return AluT_Z;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
return AluT_W;
if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
return AluT_XYZW;
return AluAny;
}
int R600SchedStrategy::getInstKind(SUnit* SU) {
int Opcode = SU->getInstr()->getOpcode();
if (TII->isALUInstr(Opcode)) {
return IDAlu;
}
switch (Opcode) {
case AMDGPU::COPY:
case AMDGPU::CONST_COPY:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::DOT4_eg_pseudo:
case AMDGPU::DOT4_r600_pseudo:
return IDAlu;
case AMDGPU::TEX_VTX_CONSTBUF:
case AMDGPU::TEX_VTX_TEXBUF:
case AMDGPU::TEX_LD:
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
case AMDGPU::TEX_GET_GRADIENTS_H:
case AMDGPU::TEX_GET_GRADIENTS_V:
case AMDGPU::TEX_SET_GRADIENTS_H:
case AMDGPU::TEX_SET_GRADIENTS_V:
case AMDGPU::TEX_SAMPLE:
case AMDGPU::TEX_SAMPLE_C:
case AMDGPU::TEX_SAMPLE_L:
case AMDGPU::TEX_SAMPLE_C_L:
case AMDGPU::TEX_SAMPLE_LB:
case AMDGPU::TEX_SAMPLE_C_LB:
case AMDGPU::TEX_SAMPLE_G:
case AMDGPU::TEX_SAMPLE_C_G:
case AMDGPU::TXD:
case AMDGPU::TXD_SHADOW:
return IDFetch;
default:
DEBUG(
dbgs() << "other inst: ";
SU->dump(DAG);
);
return IDOther;
}
}
class ConstPairs {
private:
unsigned XYPair;
unsigned ZWPair;
public:
ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
for (unsigned i = 0; i < 3; i++) {
unsigned ReadConstChan = ReadConst[i] & 3;
unsigned ReadConstIndex = ReadConst[i] & (~3);
if (ReadConstChan < 2) {
if (!XYPair) {
XYPair = ReadConstIndex;
}
} else {
if (!ZWPair) {
ZWPair = ReadConstIndex;
}
}
}
}
bool isCompatibleWith(const ConstPairs& CP) const {
return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
(!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
}
};
static
const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
unsigned ReadConsts[3] = {0, 0, 0};
R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
if (!TII->isALUInstr(MI.getOpcode()))
return ConstPairs(ReadConsts);
for (unsigned i = 0; i < 3; i++) {
int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
if (SrcIdx < 0)
break;
if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
ReadConsts[i] =MI.getOperand(
TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
}
return ConstPairs(ReadConsts);
}
bool
R600SchedStrategy::isBundleable(const MachineInstr& MI) {
const ConstPairs &MIPair = getPairs(TII, MI);
for (unsigned i = 0; i < 4; i++) {
if (!InstructionsGroupCandidate[i])
continue;
const ConstPairs &IGPair = getPairs(TII,
*InstructionsGroupCandidate[i]->getInstr());
if (!IGPair.isCompatibleWith(MIPair))
return false;
}
return true;
}
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
if (Q.empty())
return NULL;
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
It != E; ++It) {
SUnit *SU = *It;
if (isBundleable(*SU->getInstr())) {
Q.erase(It);
return SU;
}
}
return NULL;
}
void R600SchedStrategy::LoadAlu() {
ReadyQueue *QSrc = Pending[IDAlu];
for (ReadyQueue::iterator I = QSrc->begin(),
E = QSrc->end(); I != E; ++I) {
(*I)->NodeQueueId &= ~QSrc->getID();
AluKind AK = getAluKind(*I);
AvailableAlus[AK].insert(*I);
}
QSrc->clear();
}
void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
LoadAlu();
}
void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
unsigned DestReg = MI->getOperand(0).getReg();
// PressureRegister crashes if an operand is def and used in the same inst
// and we try to constraint its regclass
for (MachineInstr::mop_iterator It = MI->operands_begin(),
E = MI->operands_end(); It != E; ++It) {
MachineOperand &MO = *It;
if (MO.isReg() && !MO.isDef() &&
MO.getReg() == MI->getOperand(0).getReg())
return;
}
// Constrains the regclass of DestReg to assign it to Slot
switch (Slot) {
case 0:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
break;
case 1:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
break;
case 2:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
break;
case 3:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
break;
}
}
SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
if (!UnslotedSU) {
return SlotedSU;
} else if (!SlotedSU) {
AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU;
} else {
//Determine which one to pick (the lesser one)
if (CompareSUnit()(SlotedSU, UnslotedSU)) {
AvailableAlus[AluAny].insert(UnslotedSU);
return SlotedSU;
} else {
AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU;
}
}
}
bool R600SchedStrategy::isAvailablesAluEmpty() const {
return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
}
SUnit* R600SchedStrategy::pickAlu() {
while (!isAvailablesAluEmpty()) {
if (!OccupedSlotsMask) {
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask = 15;
return PopInst(AvailableAlus[AluDiscarded]);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask = 15;
return PopInst(AvailableAlus[AluT_XYZW]);
}
}
for (unsigned Chan = 0; Chan < 4; ++Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) {
SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate[Chan] = SU;
return SU;
}
}
}
PrepareNextSlot();
}
return NULL;
}
SUnit* R600SchedStrategy::pickOther(int QID) {
SUnit *SU = 0;
ReadyQueue *AQ = Available[QID];
if (AQ->empty()) {
MoveUnits(Pending[QID], AQ);
}
if (!AQ->empty()) {
SU = *AQ->begin();
AQ->remove(AQ->begin());
}
return SU;
}

View File

@ -0,0 +1,121 @@
//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 Machine Scheduler interface
//
//===----------------------------------------------------------------------===//
#ifndef R600MACHINESCHEDULER_H_
#define R600MACHINESCHEDULER_H_
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/PriorityQueue.h"
using namespace llvm;
namespace llvm {
class CompareSUnit {
public:
bool operator()(const SUnit *S1, const SUnit *S2) {
return S1->getDepth() > S2->getDepth();
}
};
class R600SchedStrategy : public MachineSchedStrategy {
const ScheduleDAGMI *DAG;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
MachineRegisterInfo *MRI;
enum InstQueue {
QAlu = 1,
QFetch = 2,
QOther = 4
};
enum InstKind {
IDAlu,
IDFetch,
IDOther,
IDLast
};
enum AluKind {
AluAny,
AluT_X,
AluT_Y,
AluT_Z,
AluT_W,
AluT_XYZW,
AluDiscarded, // LLVM Instructions that are going to be eliminated
AluLast
};
ReadyQueue *Available[IDLast], *Pending[IDLast];
std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
InstKind CurInstKind;
int CurEmitted;
InstKind NextInstKind;
int InstKindLimit[IDLast];
int OccupedSlotsMask;
public:
R600SchedStrategy() :
DAG(0), TII(0), TRI(0), MRI(0) {
Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
Available[IDOther] = new ReadyQueue(QOther, "AOther");
Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
}
virtual ~R600SchedStrategy() {
for (unsigned I = 0; I < IDLast; ++I) {
delete Available[I];
delete Pending[I];
}
}
virtual void initialize(ScheduleDAGMI *dag);
virtual SUnit *pickNode(bool &IsTopNode);
virtual void schedNode(SUnit *SU, bool IsTopNode);
virtual void releaseTopNode(SUnit *SU);
virtual void releaseBottomNode(SUnit *SU);
private:
SUnit *InstructionsGroupCandidate[4];
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
bool isAvailablesAluEmpty() const;
SUnit *AttemptFillSlot (unsigned Slot);
void PrepareNextSlot();
SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
SUnit* pickOther(int QID);
bool isBundleable(const MachineInstr& MI);
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
};
} // namespace llvm
#endif /* R600MACHINESCHEDULER_H_ */