1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00
llvm-mirror/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
Justin Bogner 3803387e4a AMDGPU: Fix a use-after free and a missing break
We're erasing MI here, but then immediately using it again inside the
`if`. This moves the erase after we're done using it.

Doing that reveals a second problem though - this case is missing a
break, so we fall through to the default and dereference MI again.
This is obviously a bug, though I don't know how to write a test that
triggers it - all we do in the error case is print some extra debug
output.

Both of these issue crash on lots of tests under ASAN with the
recycling allocator changes from PR26808 applied.

llvm-svn: 264442
2016-03-25 18:33:16 +00:00

681 lines
22 KiB
C++

//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass compute turns all control flow pseudo instructions into native one
/// computing their address on the fly ; it also sets STACK_SIZE info.
//===----------------------------------------------------------------------===//
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "r600cf"
namespace {
struct CFStack {
enum StackItem {
ENTRY = 0,
SUB_ENTRY = 1,
FIRST_NON_WQM_PUSH = 2,
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
};
const AMDGPUSubtarget *ST;
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
unsigned CurrentEntries;
unsigned CurrentSubEntries;
CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
unsigned getLoopDepth();
bool branchStackContains(CFStack::StackItem);
bool requiresWorkAroundForInst(unsigned Opcode);
unsigned getSubEntrySize(CFStack::StackItem Item);
void updateMaxStackSize();
void pushBranch(unsigned Opcode, bool isWQM = false);
void pushLoop();
void popBranch();
void popLoop();
};
unsigned CFStack::getLoopDepth() {
return LoopStack.size();
}
bool CFStack::branchStackContains(CFStack::StackItem Item) {
for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
E = BranchStack.end(); I != E; ++I) {
if (*I == Item)
return true;
}
return false;
}
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
getLoopDepth() > 1)
return true;
if (!ST->hasCFAluBug())
return false;
switch(Opcode) {
default: return false;
case AMDGPU::CF_ALU_PUSH_BEFORE:
case AMDGPU::CF_ALU_ELSE_AFTER:
case AMDGPU::CF_ALU_BREAK:
case AMDGPU::CF_ALU_CONTINUE:
if (CurrentSubEntries == 0)
return false;
if (ST->getWavefrontSize() == 64) {
// We are being conservative here. We only require this work-around if
// CurrentSubEntries > 3 &&
// (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
//
// We have to be conservative, because we don't know for certain that
// our stack allocation algorithm for Evergreen/NI is correct. Applying this
// work-around when CurrentSubEntries > 3 allows us to over-allocate stack
// resources without any problems.
return CurrentSubEntries > 3;
} else {
assert(ST->getWavefrontSize() == 32);
// We are being conservative here. We only require the work-around if
// CurrentSubEntries > 7 &&
// (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
// See the comment on the wavefront size == 64 case for why we are
// being conservative.
return CurrentSubEntries > 7;
}
}
}
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
switch(Item) {
default:
return 0;
case CFStack::FIRST_NON_WQM_PUSH:
assert(!ST->hasCaymanISA());
if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
// +1 For the push operation.
// +2 Extra space required.
return 3;
} else {
// Some documentation says that this is not necessary on Evergreen,
// but experimentation has show that we need to allocate 1 extra
// sub-entry for the first non-WQM push.
// +1 For the push operation.
// +1 Extra space required.
return 2;
}
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
// +1 For the push operation.
// +1 Extra space required.
return 2;
case CFStack::SUB_ENTRY:
return 1;
}
}
void CFStack::updateMaxStackSize() {
unsigned CurrentStackSize =
CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
}
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
CFStack::StackItem Item = CFStack::ENTRY;
switch(Opcode) {
case AMDGPU::CF_PUSH_EG:
case AMDGPU::CF_ALU_PUSH_BEFORE:
if (!isWQM) {
if (!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
// See comment in
// CFStack::getSubEntrySize()
else if (CurrentEntries > 0 &&
ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
else
Item = CFStack::SUB_ENTRY;
} else
Item = CFStack::ENTRY;
break;
}
BranchStack.push_back(Item);
if (Item == CFStack::ENTRY)
CurrentEntries++;
else
CurrentSubEntries += getSubEntrySize(Item);
updateMaxStackSize();
}
void CFStack::pushLoop() {
LoopStack.push_back(CFStack::ENTRY);
CurrentEntries++;
updateMaxStackSize();
}
void CFStack::popBranch() {
CFStack::StackItem Top = BranchStack.back();
if (Top == CFStack::ENTRY)
CurrentEntries--;
else
CurrentSubEntries-= getSubEntrySize(Top);
BranchStack.pop_back();
}
void CFStack::popLoop() {
CurrentEntries--;
LoopStack.pop_back();
}
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
enum ControlFlowInstruction {
CF_TC,
CF_VC,
CF_CALL_FS,
CF_WHILE_LOOP,
CF_END_LOOP,
CF_LOOP_BREAK,
CF_LOOP_CONTINUE,
CF_JUMP,
CF_ELSE,
CF_POP,
CF_END
};
static char ID;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
const AMDGPUSubtarget *ST;
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
return true;
default:
return false;
}
}
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
break;
case CF_VC:
Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
break;
case CF_CALL_FS:
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
break;
case CF_WHILE_LOOP:
Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
break;
case CF_END_LOOP:
Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
break;
case CF_LOOP_BREAK:
Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
break;
case CF_LOOP_CONTINUE:
Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
break;
case CF_JUMP:
Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
break;
case CF_ELSE:
Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
break;
case CF_POP:
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
break;
case CF_END:
if (ST->hasCaymanISA()) {
Opcode = AMDGPU::CF_END_CM;
break;
}
Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
break;
}
assert (Opcode && "No opcode selected");
return TII->get(Opcode);
}
bool isCompatibleWithClause(const MachineInstr *MI,
std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
E = MI->operands_end(); I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
if (MO.isDef()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
}
if (MO.isUse()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
}
}
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
DstRegs.insert(DstMI);
return true;
} else
return false;
}
ClauseFile
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
bool IsTex = TII->usesTextureCache(ClauseHead);
std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
if (AluInstCount >= MaxFetchInst)
break;
if ((IsTex && !TII->usesTextureCache(I)) ||
(!IsTex && !TII->usesVertexCache(I)))
break;
if (!isCompatibleWithClause(I, DstRegs))
break;
AluInstCount ++;
ClauseContent.push_back(I);
}
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(0) // ADDR
.addImm(AluInstCount - 1); // COUNT
return ClauseFile(MIb, std::move(ClauseContent));
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
};
const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
TII->getSrcs(MI);
for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
int64_t Imm = Srcs[i].second;
std::vector<int64_t>::iterator It =
std::find(Lits.begin(), Lits.end(), Imm);
if (It != Lits.end()) {
unsigned Index = It - Lits.begin();
Srcs[i].first->setReg(LiteralRegs[Index]);
} else {
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
Lits.push_back(Imm);
}
}
}
MachineBasicBlock::iterator insertLiterals(
MachineBasicBlock::iterator InsertPos,
const std::vector<unsigned> &Literals) const {
MachineBasicBlock *MBB = InsertPos->getParent();
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
unsigned LiteralPair0 = Literals[i];
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
TII->get(AMDGPU::LITERALS))
.addImm(LiteralPair0)
.addImm(LiteralPair1);
}
return InsertPos;
}
ClauseFile
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
I++;
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
if (IsTrivialInst(I)) {
++I;
continue;
}
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
break;
std::vector<int64_t> Literals;
if (I->isBundle()) {
MachineInstr *DeleteMI = I;
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
while (++BI != E && BI->isBundledWithPred()) {
BI->unbundleFromPred();
for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = BI->getOperand(i);
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
getLiteral(&*BI, Literals);
ClauseContent.push_back(&*BI);
}
I = BI;
DeleteMI->eraseFromParent();
} else {
getLiteral(I, Literals);
ClauseContent.push_back(I);
I++;
}
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
unsigned literal0 = Literals[i];
unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
TII->get(AMDGPU::LITERALS))
.addImm(literal0)
.addImm(literal2);
ClauseContent.push_back(MILit);
}
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, std::move(ClauseContent));
}
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
.addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
CfCount += 2 * Clause.second.size();
}
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
.addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
CfCount += Clause.second.size();
}
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
unsigned Addr) const {
for (MachineInstr *MI : MIs) {
CounterPropagateAddr(MI, Addr);
}
}
public:
R600ControlFlowFinalizer(TargetMachine &tm)
: MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
ST = &MF.getSubtarget<AMDGPUSubtarget>();
MaxFetchInst = ST->getTexVTXClauseSize();
TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
CFStack CFStack(ST, MFI->getShaderType());
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
if (MFI->getShaderType() == ShaderType::VERTEX) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
getHWInstrDesc(CF_CALL_FS));
CfCount++;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
std::vector<MachineInstr *> LastAlu(1);
std::vector<MachineInstr *> ToPopAfter;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
LastAlu.back() = nullptr;
continue;
}
MachineBasicBlock::iterator MI = I;
if (MI->getOpcode() != AMDGPU::ENDIF)
LastAlu.back() = nullptr;
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
bool RequiresWorkAround =
CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
if (RequiresWorkAround) {
DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
MI->setDesc(TII->get(AMDGPU::CF_ALU));
CfCount++;
CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
} else
CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
CFStack.pushLoop();
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(1);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
LoopStack.push_back(std::move(Pair));
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *> > Pair =
std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::IF_PREDICATE_SET: {
LastAlu.push_back(nullptr);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::ENDIF: {
CFStack.popBranch();
if (LastAlu.back()) {
ToPopAfter.push_back(LastAlu.back());
} else {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
(void)MIb;
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CfCount++;
}
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount);
IfOrElseInst->getOperand(1).setImm(1);
LastAlu.pop_back();
MI->eraseFromParent();
break;
}
case AMDGPU::BREAK: {
CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
break;
}
case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::RETURN: {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
CfCount++;
if (CfCount % 2) {
BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
CfCount++;
}
MI->eraseFromParent();
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
EmitFetchClause(I, FetchClauses[i], CfCount);
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
EmitALUClause(I, AluClauses[i], CfCount);
break;
}
default:
if (TII->isExport(MI->getOpcode())) {
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
}
break;
}
}
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
MachineInstr *Alu = ToPopAfter[i];
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
TII->get(AMDGPU::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
.addImm(Alu->getOperand(1).getImm())
.addImm(Alu->getOperand(2).getImm())
.addImm(Alu->getOperand(3).getImm())
.addImm(Alu->getOperand(4).getImm())
.addImm(Alu->getOperand(5).getImm())
.addImm(Alu->getOperand(6).getImm())
.addImm(Alu->getOperand(7).getImm())
.addImm(Alu->getOperand(8).getImm());
Alu->eraseFromParent();
}
MFI->StackSize = CFStack.MaxStackSize;
}
return false;
}
const char *getPassName() const override {
return "R600 Control Flow Finalizer Pass";
}
};
char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}