mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
R600: Do not predicated basic block with multiple alu clause
Test is not included as it is several 1000 lines long. To test this functionnality, a test case must generate at least 2 ALU clauses, where an ALU clause is ~110 instructions long. NOTE: This is a candidate for the stable branch. llvm-svn: 185943
This commit is contained in:
parent
0c1224c533
commit
5517f57c42
@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreSched2() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createR600EmitClauseMarkers(*TM));
|
||||
}
|
||||
addPass(&IfConverterID);
|
||||
return false;
|
||||
}
|
||||
@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createAMDGPUCFGPreparationPass(*TM));
|
||||
addPass(createAMDGPUCFGStructurizerPass(*TM));
|
||||
addPass(createR600EmitClauseMarkers(*TM));
|
||||
addPass(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(&FinalizeMachineBundlesID);
|
||||
addPass(createR600Packetizer(*TM));
|
||||
|
@ -256,6 +256,7 @@ private:
|
||||
ClauseContent.push_back(MILit);
|
||||
}
|
||||
}
|
||||
assert(ClauseContent.size() < 128 && "ALU clause is too big");
|
||||
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
|
||||
return ClauseFile(ClauseHead, ClauseContent);
|
||||
}
|
||||
@ -276,6 +277,7 @@ private:
|
||||
void
|
||||
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
|
||||
unsigned &CfCount) {
|
||||
Clause.first->getOperand(0).setImm(0);
|
||||
CounterPropagateAddr(Clause.first, CfCount);
|
||||
MachineBasicBlock *BB = Clause.first->getParent();
|
||||
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
|
||||
|
@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
const R600InstrInfo *TII;
|
||||
int Address;
|
||||
|
||||
unsigned OccupiedDwords(MachineInstr *MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
@ -159,7 +160,7 @@ private:
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
|
||||
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
|
||||
MachineBasicBlock::iterator ClauseHead = I;
|
||||
std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
|
||||
bool PushBeforeModifier = false;
|
||||
@ -199,20 +200,25 @@ private:
|
||||
unsigned Opcode = PushBeforeModifier ?
|
||||
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
|
||||
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
|
||||
.addImm(0) // ADDR
|
||||
// We don't use the ADDR field until R600ControlFlowFinalizer pass, where
|
||||
// it is safe to assume it is 0. However if we always put 0 here, the ifcvt
|
||||
// pass may assume that identical ALU clause starter at the beginning of a
|
||||
// true and false branch can be factorized which is not the case.
|
||||
.addImm(Address++) // ADDR
|
||||
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
|
||||
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
|
||||
.addImm(KCacheBanks.empty()?0:2) // KM0
|
||||
.addImm((KCacheBanks.size() < 2)?0:2) // KM1
|
||||
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
|
||||
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
|
||||
.addImm(AluInstCount); // COUNT
|
||||
.addImm(AluInstCount) // COUNT
|
||||
.addImm(1); // Enabled
|
||||
return I;
|
||||
}
|
||||
|
||||
public:
|
||||
R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
|
||||
TII(0) { }
|
||||
TII(0), Address(0) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
|
||||
|
@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
|
||||
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
|
||||
It != E; ++It) {
|
||||
if (It->getOpcode() == AMDGPU::CF_ALU ||
|
||||
It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
|
||||
return llvm::prior(It.base());
|
||||
}
|
||||
return MBB.end();
|
||||
}
|
||||
|
||||
unsigned
|
||||
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
return 1;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
return 2;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(predSet, 0, MO_FLAG_PUSH);
|
||||
I->eraseFromParent();
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
break;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU));
|
||||
break;
|
||||
}
|
||||
case AMDGPU::JUMP:
|
||||
@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(predSet, 0, MO_FLAG_PUSH);
|
||||
I->eraseFromParent();
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
break;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU));
|
||||
break;
|
||||
}
|
||||
case AMDGPU::JUMP:
|
||||
@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::KILLGT) {
|
||||
return false;
|
||||
} else if (MI->getOpcode() == AMDGPU::CF_ALU) {
|
||||
// If the clause start in the middle of MBB then the MBB has more
|
||||
// than a single clause, unable to predicate several clauses.
|
||||
if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
|
||||
return false;
|
||||
// TODO: We don't support KC merging atm
|
||||
if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
|
||||
return false;
|
||||
return true;
|
||||
} else if (isVector(*MI)) {
|
||||
return false;
|
||||
} else {
|
||||
@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
|
||||
const SmallVectorImpl<MachineOperand> &Pred) const {
|
||||
int PIdx = MI->findFirstPredOperandIdx();
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::CF_ALU) {
|
||||
MI->getOperand(8).setImm(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (PIdx != -1) {
|
||||
MachineOperand &PMO = MI->getOperand(PIdx);
|
||||
PMO.setReg(Pred[2].getReg());
|
||||
|
@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
|
||||
(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
|
||||
KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
|
||||
i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
|
||||
i32imm:$COUNT),
|
||||
i32imm:$COUNT, i32imm:$Enabled),
|
||||
!strconcat(OpName, " $COUNT, @$ADDR, "
|
||||
"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
|
||||
[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
|
||||
|
@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
|
||||
MachineBasicBlock::iterator End = MBB->end();
|
||||
MachineBasicBlock::iterator MI = MBB->begin();
|
||||
while (MI != End) {
|
||||
if (MI->isKill()) {
|
||||
if (MI->isKill() ||
|
||||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
|
||||
MachineBasicBlock::iterator DeleteMI = MI;
|
||||
++MI;
|
||||
MBB->erase(DeleteMI);
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: JUMP @3
|
||||
; CHECK: JUMP @7
|
||||
; CHECK: EXPORT
|
||||
; CHECK-NOT: EXPORT
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user