1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

R600: Factorize code handling Const Read Port limitation

llvm-svn: 177078
This commit is contained in:
Vincent Lejeune 2013-03-14 15:50:45 +00:00
parent 984e7940a4
commit cd12dadb5c
6 changed files with 140 additions and 80 deletions

View File

@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
if (i == 2)
break;
SDValue CstOffset;
if (!Operand.getValueType().isVector() &&
SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
Ops[SelIdx[i] - 1] = CstOffset;
return true;
if (Operand.getValueType().isVector() ||
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
break;
// Gather others constants values
std::vector<unsigned> Consts;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = OperandIdx[j];
if (SrcIdx < 0)
break;
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
Consts.push_back(Cst->getZExtValue());
}
}
}
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
Consts.push_back(Cst->getZExtValue());
if (!TII->fitsConstReadLimitations(Consts))
break;
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
Ops[SelIdx[i] - 1] = CstOffset;
return true;
}
break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;

View File

@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
assert (Consts.size() <= 12 && "Too many operands in instructions group");
unsigned Pair1 = 0, Pair2 = 0;
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
unsigned ReadConstHalf = Consts[i] & 2;
unsigned ReadConstIndex = Consts[i] & (~3);
unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
if (!Pair1) {
Pair1 = ReadHalfConst;
continue;
}
if (Pair1 == ReadHalfConst)
continue;
if (!Pair2) {
Pair2 = ReadHalfConst;
continue;
}
if (Pair2 != ReadHalfConst)
return false;
}
return true;
}
bool
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
const MachineInstr *MI = MIs[i];
const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
if (!isALUInstr(MI->getOpcode()))
continue;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
unsigned Const = MI->getOperand(
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Consts.push_back(Const);
}
}
}
return fitsConstReadLimitations(Consts);
}
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();

View File

@ -53,6 +53,9 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
bool canBundle(const std::vector<MachineInstr *> &) const;
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;

View File

@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
class ConstPairs {
private:
unsigned XYPair;
unsigned ZWPair;
public:
ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
for (unsigned i = 0; i < 3; i++) {
unsigned ReadConstChan = ReadConst[i] & 3;
unsigned ReadConstIndex = ReadConst[i] & (~3);
if (ReadConstChan < 2) {
if (!XYPair) {
XYPair = ReadConstIndex;
}
} else {
if (!ZWPair) {
ZWPair = ReadConstIndex;
}
}
}
}
bool isCompatibleWith(const ConstPairs& CP) const {
return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
(!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
}
};
static
const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
unsigned ReadConsts[3] = {0, 0, 0};
R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
if (!TII->isALUInstr(MI.getOpcode()))
return ConstPairs(ReadConsts);
for (unsigned i = 0; i < 3; i++) {
int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
if (SrcIdx < 0)
break;
if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
ReadConsts[i] =MI.getOperand(
TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
}
return ConstPairs(ReadConsts);
}
bool
R600SchedStrategy::isBundleable(const MachineInstr& MI) {
const ConstPairs &MIPair = getPairs(TII, MI);
for (unsigned i = 0; i < 4; i++) {
if (!InstructionsGroupCandidate[i])
continue;
const ConstPairs &IGPair = getPairs(TII,
*InstructionsGroupCandidate[i]->getInstr());
if (!IGPair.isCompatibleWith(MIPair))
return false;
}
return true;
}
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
if (Q.empty())
return NULL;
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
It != E; ++It) {
SUnit *SU = *It;
if (isBundleable(*SU->getInstr())) {
InstructionsGroupCandidate.push_back(SU->getInstr());
if (TII->canBundle(InstructionsGroupCandidate)) {
InstructionsGroupCandidate.pop_back();
Q.erase(It);
return SU;
} else {
InstructionsGroupCandidate.pop_back();
}
}
return NULL;
@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstructionsGroupCandidate.clear();
LoadAlu();
}
@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate[Chan] = SU;
InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
}

View File

@ -98,7 +98,7 @@ public:
virtual void releaseBottomNode(SUnit *SU);
private:
SUnit *InstructionsGroupCandidate[4];
std::vector<MachineInstr *> InstructionsGroupCandidate;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
@ -112,7 +112,6 @@ private:
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
SUnit* pickOther(int QID);
bool isBundleable(const MachineInstr& MI);
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
};

View File

@ -1,8 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @main1
; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
define void @main() {
define void @main1() {
main_body:
%0 = load <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
@ -48,5 +48,53 @@ main_body:
ret void
}
; CHECK: @main2
; CHECK-NOT: MOV
define void @main2() {
main_body:
%0 = load <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
%2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 1
%6 = fcmp ult float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
%8 = load <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%11 = extractelement <4 x float> %10, i32 0
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ult float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
%16 = load <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
%18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 3
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ult float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
%24 = load <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
%26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%27 = extractelement <4 x float> %26, i32 3
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 2
%30 = fcmp ult float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
%34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
%35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
%36 = insertelement <4 x float> undef, float %32, i32 0
%37 = insertelement <4 x float> %36, float %33, i32 1
%38 = insertelement <4 x float> %37, float %34, i32 2
%39 = insertelement <4 x float> %38, float %35, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
ret void
}
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)