mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
AMDGPU: WQM cleanups
- Add new TTI instruction checks - Don't use const for blocks that are mutated. - Checking isBranch and isTerminator should be redundant llvm-svn: 275252
This commit is contained in:
parent
e9d18bfc80
commit
fafe4e3cc6
@ -350,6 +350,14 @@ public:
|
|||||||
return get(Opcode).TSFlags & SIInstrFlags::DPP;
|
return get(Opcode).TSFlags & SIInstrFlags::DPP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isScalarUnit(const MachineInstr &MI) {
|
||||||
|
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool usesVM_CNT(const MachineInstr &MI) {
|
||||||
|
return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
|
||||||
|
}
|
||||||
|
|
||||||
bool isVGPRCopy(const MachineInstr &MI) const {
|
bool isVGPRCopy(const MachineInstr &MI) const {
|
||||||
assert(MI.isCopy());
|
assert(MI.isCopy());
|
||||||
unsigned Dest = MI.getOperand(0).getReg();
|
unsigned Dest = MI.getOperand(0).getReg();
|
||||||
|
@ -53,13 +53,10 @@
|
|||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
#include "SIInstrInfo.h"
|
#include "SIInstrInfo.h"
|
||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "llvm/CodeGen/MachineDominanceFrontier.h"
|
|
||||||
#include "llvm/CodeGen/MachineDominators.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/IR/Constants.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@ -84,12 +81,12 @@ struct BlockInfo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct WorkItem {
|
struct WorkItem {
|
||||||
const MachineBasicBlock *MBB = nullptr;
|
MachineBasicBlock *MBB = nullptr;
|
||||||
const MachineInstr *MI = nullptr;
|
MachineInstr *MI = nullptr;
|
||||||
|
|
||||||
WorkItem() {}
|
WorkItem() {}
|
||||||
WorkItem(const MachineBasicBlock *MBB) : MBB(MBB) {}
|
WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
|
||||||
WorkItem(const MachineInstr *MI) : MI(MI) {}
|
WorkItem(MachineInstr *MI) : MI(MI) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
class SIWholeQuadMode : public MachineFunctionPass {
|
class SIWholeQuadMode : public MachineFunctionPass {
|
||||||
@ -99,13 +96,13 @@ private:
|
|||||||
MachineRegisterInfo *MRI;
|
MachineRegisterInfo *MRI;
|
||||||
|
|
||||||
DenseMap<const MachineInstr *, InstrInfo> Instructions;
|
DenseMap<const MachineInstr *, InstrInfo> Instructions;
|
||||||
DenseMap<const MachineBasicBlock *, BlockInfo> Blocks;
|
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
|
||||||
SmallVector<const MachineInstr *, 2> ExecExports;
|
SmallVector<const MachineInstr *, 2> ExecExports;
|
||||||
SmallVector<MachineInstr *, 1> LiveMaskQueries;
|
SmallVector<MachineInstr *, 1> LiveMaskQueries;
|
||||||
|
|
||||||
char scanInstructions(MachineFunction &MF, std::vector<WorkItem>& Worklist);
|
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
|
||||||
void propagateInstruction(const MachineInstr &MI, std::vector<WorkItem>& Worklist);
|
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
|
||||||
void propagateBlock(const MachineBasicBlock &MBB, std::vector<WorkItem>& Worklist);
|
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
|
||||||
char analyzeFunction(MachineFunction &MF);
|
char analyzeFunction(MachineFunction &MF);
|
||||||
|
|
||||||
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
|
||||||
@ -138,10 +135,8 @@ public:
|
|||||||
|
|
||||||
char SIWholeQuadMode::ID = 0;
|
char SIWholeQuadMode::ID = 0;
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE,
|
INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
|
||||||
"SI Whole Quad Mode", false, false)
|
"SI Whole Quad Mode", false, false)
|
||||||
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE,
|
|
||||||
"SI Whole Quad Mode", false, false)
|
|
||||||
|
|
||||||
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
|
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
|
||||||
|
|
||||||
@ -166,8 +161,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
|||||||
|
|
||||||
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
|
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
|
||||||
Flags = StateWQM;
|
Flags = StateWQM;
|
||||||
} else if (TII->get(Opcode).mayStore() &&
|
} else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
|
||||||
(MI.getDesc().TSFlags & SIInstrFlags::VM_CNT)) {
|
|
||||||
Flags = StateExact;
|
Flags = StateExact;
|
||||||
} else {
|
} else {
|
||||||
// Handle export instructions with the exec mask valid flag set
|
// Handle export instructions with the exec mask valid flag set
|
||||||
@ -214,16 +208,15 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
|||||||
return GlobalFlags;
|
return GlobalFlags;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
|
void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
|
||||||
std::vector<WorkItem>& Worklist) {
|
std::vector<WorkItem>& Worklist) {
|
||||||
const MachineBasicBlock &MBB = *MI.getParent();
|
MachineBasicBlock *MBB = MI.getParent();
|
||||||
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
|
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
|
||||||
BlockInfo &BI = Blocks[&MBB];
|
BlockInfo &BI = Blocks[MBB];
|
||||||
|
|
||||||
// Control flow-type instructions that are followed by WQM computations
|
// Control flow-type instructions that are followed by WQM computations
|
||||||
// must themselves be in WQM.
|
// must themselves be in WQM.
|
||||||
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) &&
|
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
|
||||||
(MI.isBranch() || MI.isTerminator())) {
|
|
||||||
Instructions[&MI].Needs = StateWQM;
|
Instructions[&MI].Needs = StateWQM;
|
||||||
II.Needs = StateWQM;
|
II.Needs = StateWQM;
|
||||||
}
|
}
|
||||||
@ -232,11 +225,11 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
|
|||||||
BI.Needs |= II.Needs;
|
BI.Needs |= II.Needs;
|
||||||
if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
|
if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
|
||||||
BI.InNeeds |= II.Needs;
|
BI.InNeeds |= II.Needs;
|
||||||
Worklist.push_back(&MBB);
|
Worklist.push_back(MBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Propagate backwards within block
|
// Propagate backwards within block
|
||||||
if (const MachineInstr *PrevMI = MI.getPrevNode()) {
|
if (MachineInstr *PrevMI = MI.getPrevNode()) {
|
||||||
char InNeeds = II.Needs | II.OutNeeds;
|
char InNeeds = II.Needs | II.OutNeeds;
|
||||||
if (!PrevMI->isPHI()) {
|
if (!PrevMI->isPHI()) {
|
||||||
InstrInfo &PrevII = Instructions[PrevMI];
|
InstrInfo &PrevII = Instructions[PrevMI];
|
||||||
@ -267,9 +260,8 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
|
|||||||
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
|
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for (const MachineOperand &Def : MRI->def_operands(Use.getReg())) {
|
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
|
||||||
const MachineInstr *DefMI = Def.getParent();
|
InstrInfo &DefII = Instructions[&DefMI];
|
||||||
InstrInfo &DefII = Instructions[DefMI];
|
|
||||||
|
|
||||||
// Obviously skip if DefMI is already flagged as NeedWQM.
|
// Obviously skip if DefMI is already flagged as NeedWQM.
|
||||||
//
|
//
|
||||||
@ -281,18 +273,18 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
DefII.Needs = StateWQM;
|
DefII.Needs = StateWQM;
|
||||||
Worklist.push_back(DefMI);
|
Worklist.push_back(&DefMI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
|
void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
|
||||||
std::vector<WorkItem>& Worklist) {
|
std::vector<WorkItem>& Worklist) {
|
||||||
BlockInfo BI = Blocks[&MBB]; // take a copy to prevent dangling references
|
BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.
|
||||||
|
|
||||||
// Propagate through instructions
|
// Propagate through instructions
|
||||||
if (!MBB.empty()) {
|
if (!MBB.empty()) {
|
||||||
const MachineInstr *LastMI = &*MBB.rbegin();
|
MachineInstr *LastMI = &*MBB.rbegin();
|
||||||
InstrInfo &LastII = Instructions[LastMI];
|
InstrInfo &LastII = Instructions[LastMI];
|
||||||
if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
|
if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
|
||||||
LastII.OutNeeds |= BI.OutNeeds;
|
LastII.OutNeeds |= BI.OutNeeds;
|
||||||
@ -301,7 +293,7 @@ void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Predecessor blocks must provide for our WQM/Exact needs.
|
// Predecessor blocks must provide for our WQM/Exact needs.
|
||||||
for (const MachineBasicBlock *Pred : MBB.predecessors()) {
|
for (MachineBasicBlock *Pred : MBB.predecessors()) {
|
||||||
BlockInfo &PredBI = Blocks[Pred];
|
BlockInfo &PredBI = Blocks[Pred];
|
||||||
if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
|
if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
|
||||||
continue;
|
continue;
|
||||||
@ -311,9 +303,8 @@ void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
|
|||||||
Worklist.push_back(Pred);
|
Worklist.push_back(Pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
// All successors must be prepared to accept the same set of WQM/Exact
|
// All successors must be prepared to accept the same set of WQM/Exact data.
|
||||||
// data.
|
for (MachineBasicBlock *Succ : MBB.successors()) {
|
||||||
for (const MachineBasicBlock *Succ : MBB.successors()) {
|
|
||||||
BlockInfo &SuccBI = Blocks[Succ];
|
BlockInfo &SuccBI = Blocks[Succ];
|
||||||
if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
|
if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
|
||||||
continue;
|
continue;
|
||||||
@ -394,8 +385,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
|||||||
++II;
|
++II;
|
||||||
|
|
||||||
// Skip instructions that are not affected by EXEC
|
// Skip instructions that are not affected by EXEC
|
||||||
if (MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD) &&
|
if (TII->isScalarUnit(MI) && !MI.isTerminator())
|
||||||
!MI.isBranch() && !MI.isTerminator())
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Generic instructions such as COPY will either disappear by register
|
// Generic instructions such as COPY will either disappear by register
|
||||||
@ -421,7 +411,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
|||||||
|
|
||||||
// Make sure to switch to Exact mode before the end of the block when
|
// Make sure to switch to Exact mode before the end of the block when
|
||||||
// Exact and only Exact is needed further downstream.
|
// Exact and only Exact is needed further downstream.
|
||||||
if (OutNeeds == StateExact && (MI.isBranch() || MI.isTerminator())) {
|
if (OutNeeds == StateExact && MI.isTerminator()) {
|
||||||
assert(Needs == 0);
|
assert(Needs == 0);
|
||||||
Needs = StateExact;
|
Needs = StateExact;
|
||||||
}
|
}
|
||||||
@ -456,7 +446,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
|
|||||||
|
|
||||||
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
|
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
|
||||||
for (MachineInstr *MI : LiveMaskQueries) {
|
for (MachineInstr *MI : LiveMaskQueries) {
|
||||||
DebugLoc DL = MI->getDebugLoc();
|
const DebugLoc &DL = MI->getDebugLoc();
|
||||||
unsigned Dest = MI->getOperand(0).getReg();
|
unsigned Dest = MI->getOperand(0).getReg();
|
||||||
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
|
BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
|
||||||
.addReg(LiveMaskReg);
|
.addReg(LiveMaskReg);
|
||||||
@ -512,9 +502,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
lowerLiveMaskQueries(LiveMaskReg);
|
lowerLiveMaskQueries(LiveMaskReg);
|
||||||
|
|
||||||
// Handle the general case
|
// Handle the general case
|
||||||
for (const auto &BII : Blocks)
|
for (auto BII : Blocks)
|
||||||
processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg,
|
processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
|
||||||
BII.first == &*MF.begin());
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user