1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: WQM cleanups

- Add new TTI instruction checks
- Don't use const for blocks that are mutated.
- Checking isBranch and isTerminator should be redundant

llvm-svn: 275252
This commit is contained in:
Matt Arsenault 2016-07-13 05:55:15 +00:00
parent e9d18bfc80
commit fafe4e3cc6
2 changed files with 39 additions and 42 deletions

View File

@ -350,6 +350,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP; return get(Opcode).TSFlags & SIInstrFlags::DPP;
} }
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
static bool usesVM_CNT(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
}
bool isVGPRCopy(const MachineInstr &MI) const { bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy()); assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg(); unsigned Dest = MI.getOperand(0).getReg();

View File

@ -53,13 +53,10 @@
#include "AMDGPUSubtarget.h" #include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h" #include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h" #include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
using namespace llvm; using namespace llvm;
@ -84,12 +81,12 @@ struct BlockInfo {
}; };
struct WorkItem { struct WorkItem {
const MachineBasicBlock *MBB = nullptr; MachineBasicBlock *MBB = nullptr;
const MachineInstr *MI = nullptr; MachineInstr *MI = nullptr;
WorkItem() {} WorkItem() {}
WorkItem(const MachineBasicBlock *MBB) : MBB(MBB) {} WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
WorkItem(const MachineInstr *MI) : MI(MI) {} WorkItem(MachineInstr *MI) : MI(MI) {}
}; };
class SIWholeQuadMode : public MachineFunctionPass { class SIWholeQuadMode : public MachineFunctionPass {
@ -99,13 +96,13 @@ private:
MachineRegisterInfo *MRI; MachineRegisterInfo *MRI;
DenseMap<const MachineInstr *, InstrInfo> Instructions; DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<const MachineBasicBlock *, BlockInfo> Blocks; DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<const MachineInstr *, 2> ExecExports; SmallVector<const MachineInstr *, 2> ExecExports;
SmallVector<MachineInstr *, 1> LiveMaskQueries; SmallVector<MachineInstr *, 1> LiveMaskQueries;
char scanInstructions(MachineFunction &MF, std::vector<WorkItem>& Worklist); char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(const MachineInstr &MI, std::vector<WorkItem>& Worklist); void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(const MachineBasicBlock &MBB, std::vector<WorkItem>& Worklist); void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF); char analyzeFunction(MachineFunction &MF);
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before, void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
@ -138,10 +135,8 @@ public:
char SIWholeQuadMode::ID = 0; char SIWholeQuadMode::ID = 0;
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
"SI Whole Quad Mode", false, false) "SI Whole Quad Mode", false, false)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE,
"SI Whole Quad Mode", false, false)
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID; char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
@ -166,8 +161,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) { if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
Flags = StateWQM; Flags = StateWQM;
} else if (TII->get(Opcode).mayStore() && } else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
(MI.getDesc().TSFlags & SIInstrFlags::VM_CNT)) {
Flags = StateExact; Flags = StateExact;
} else { } else {
// Handle export instructions with the exec mask valid flag set // Handle export instructions with the exec mask valid flag set
@ -214,16 +208,15 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
return GlobalFlags; return GlobalFlags;
} }
void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI, void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
std::vector<WorkItem>& Worklist) { std::vector<WorkItem>& Worklist) {
const MachineBasicBlock &MBB = *MI.getParent(); MachineBasicBlock *MBB = MI.getParent();
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[&MBB]; BlockInfo &BI = Blocks[MBB];
// Control flow-type instructions that are followed by WQM computations // Control flow-type instructions that are followed by WQM computations
// must themselves be in WQM. // must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
(MI.isBranch() || MI.isTerminator())) {
Instructions[&MI].Needs = StateWQM; Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM; II.Needs = StateWQM;
} }
@ -232,11 +225,11 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
BI.Needs |= II.Needs; BI.Needs |= II.Needs;
if ((BI.InNeeds | II.Needs) != BI.InNeeds) { if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
BI.InNeeds |= II.Needs; BI.InNeeds |= II.Needs;
Worklist.push_back(&MBB); Worklist.push_back(MBB);
} }
// Propagate backwards within block // Propagate backwards within block
if (const MachineInstr *PrevMI = MI.getPrevNode()) { if (MachineInstr *PrevMI = MI.getPrevNode()) {
char InNeeds = II.Needs | II.OutNeeds; char InNeeds = II.Needs | II.OutNeeds;
if (!PrevMI->isPHI()) { if (!PrevMI->isPHI()) {
InstrInfo &PrevII = Instructions[PrevMI]; InstrInfo &PrevII = Instructions[PrevMI];
@ -267,9 +260,8 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg())) if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
continue; continue;
for (const MachineOperand &Def : MRI->def_operands(Use.getReg())) { for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
const MachineInstr *DefMI = Def.getParent(); InstrInfo &DefII = Instructions[&DefMI];
InstrInfo &DefII = Instructions[DefMI];
// Obviously skip if DefMI is already flagged as NeedWQM. // Obviously skip if DefMI is already flagged as NeedWQM.
// //
@ -281,18 +273,18 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
continue; continue;
DefII.Needs = StateWQM; DefII.Needs = StateWQM;
Worklist.push_back(DefMI); Worklist.push_back(&DefMI);
} }
} }
} }
void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB, void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
std::vector<WorkItem>& Worklist) { std::vector<WorkItem>& Worklist) {
BlockInfo BI = Blocks[&MBB]; // take a copy to prevent dangling references BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.
// Propagate through instructions // Propagate through instructions
if (!MBB.empty()) { if (!MBB.empty()) {
const MachineInstr *LastMI = &*MBB.rbegin(); MachineInstr *LastMI = &*MBB.rbegin();
InstrInfo &LastII = Instructions[LastMI]; InstrInfo &LastII = Instructions[LastMI];
if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) { if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
LastII.OutNeeds |= BI.OutNeeds; LastII.OutNeeds |= BI.OutNeeds;
@ -301,7 +293,7 @@ void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
} }
// Predecessor blocks must provide for our WQM/Exact needs. // Predecessor blocks must provide for our WQM/Exact needs.
for (const MachineBasicBlock *Pred : MBB.predecessors()) { for (MachineBasicBlock *Pred : MBB.predecessors()) {
BlockInfo &PredBI = Blocks[Pred]; BlockInfo &PredBI = Blocks[Pred];
if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds) if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
continue; continue;
@ -311,9 +303,8 @@ void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
Worklist.push_back(Pred); Worklist.push_back(Pred);
} }
// All successors must be prepared to accept the same set of WQM/Exact // All successors must be prepared to accept the same set of WQM/Exact data.
// data. for (MachineBasicBlock *Succ : MBB.successors()) {
for (const MachineBasicBlock *Succ : MBB.successors()) {
BlockInfo &SuccBI = Blocks[Succ]; BlockInfo &SuccBI = Blocks[Succ];
if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds) if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
continue; continue;
@ -394,8 +385,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
++II; ++II;
// Skip instructions that are not affected by EXEC // Skip instructions that are not affected by EXEC
if (MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD) && if (TII->isScalarUnit(MI) && !MI.isTerminator())
!MI.isBranch() && !MI.isTerminator())
continue; continue;
// Generic instructions such as COPY will either disappear by register // Generic instructions such as COPY will either disappear by register
@ -421,7 +411,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
// Make sure to switch to Exact mode before the end of the block when // Make sure to switch to Exact mode before the end of the block when
// Exact and only Exact is needed further downstream. // Exact and only Exact is needed further downstream.
if (OutNeeds == StateExact && (MI.isBranch() || MI.isTerminator())) { if (OutNeeds == StateExact && MI.isTerminator()) {
assert(Needs == 0); assert(Needs == 0);
Needs = StateExact; Needs = StateExact;
} }
@ -456,7 +446,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
for (MachineInstr *MI : LiveMaskQueries) { for (MachineInstr *MI : LiveMaskQueries) {
DebugLoc DL = MI->getDebugLoc(); const DebugLoc &DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg(); unsigned Dest = MI->getOperand(0).getReg();
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest) BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
.addReg(LiveMaskReg); .addReg(LiveMaskReg);
@ -512,9 +502,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
lowerLiveMaskQueries(LiveMaskReg); lowerLiveMaskQueries(LiveMaskReg);
// Handle the general case // Handle the general case
for (const auto &BII : Blocks) for (auto BII : Blocks)
processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg, processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
BII.first == &*MF.begin());
return true; return true;
} }