mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU/SI: Implement a work-around for smrd corrupting vccz bit
Summary: We will hit this once we have enabled uniform branches. The smrd-vccz-bug.ll test will be added with the uniform branch commit. Reviewers: mareko, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D16725 llvm-svn: 260137
This commit is contained in:
parent
d934f6f749
commit
bb35f34026
@ -88,6 +88,9 @@ private:
|
|||||||
/// \brief Whether the machine function returns void
|
/// \brief Whether the machine function returns void
|
||||||
bool ReturnsVoid;
|
bool ReturnsVoid;
|
||||||
|
|
||||||
|
/// Whether the VCCZ bit is possibly corrupt
|
||||||
|
bool VCCZCorrupt;
|
||||||
|
|
||||||
/// \brief Get increment/decrement amount for this instruction.
|
/// \brief Get increment/decrement amount for this instruction.
|
||||||
Counters getHwCounts(MachineInstr &MI);
|
Counters getHwCounts(MachineInstr &MI);
|
||||||
|
|
||||||
@ -116,6 +119,10 @@ private:
|
|||||||
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
|
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
|
||||||
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||||
|
|
||||||
|
/// Return true if there are LGKM instrucitons that haven't been waited on
|
||||||
|
/// yet.
|
||||||
|
bool hasOutstandingLGKM() const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
@ -123,7 +130,8 @@ public:
|
|||||||
MachineFunctionPass(ID),
|
MachineFunctionPass(ID),
|
||||||
TII(nullptr),
|
TII(nullptr),
|
||||||
TRI(nullptr),
|
TRI(nullptr),
|
||||||
ExpInstrTypesSeen(0) { }
|
ExpInstrTypesSeen(0),
|
||||||
|
VCCZCorrupt(false) { }
|
||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
@ -155,6 +163,13 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
|
|||||||
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
|
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
|
||||||
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
|
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
|
||||||
|
|
||||||
|
static bool readsVCCZ(unsigned Opcode) {
|
||||||
|
return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCNZ;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SIInsertWaits::hasOutstandingLGKM() const {
|
||||||
|
return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
|
||||||
|
}
|
||||||
|
|
||||||
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
|
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
|
||||||
uint64_t TSFlags = MI.getDesc().TSFlags;
|
uint64_t TSFlags = MI.getDesc().TSFlags;
|
||||||
@ -475,6 +490,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
TRI =
|
TRI =
|
||||||
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
|
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
|
||||||
|
|
||||||
|
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
|
|
||||||
WaitedOn = ZeroCounts;
|
WaitedOn = ZeroCounts;
|
||||||
@ -493,6 +509,44 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
|
|
||||||
|
if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
|
||||||
|
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
|
||||||
|
// vccz bit, so when we detect that an instruction may read from a
|
||||||
|
// corrupt vccz bit, we need to:
|
||||||
|
// 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
|
||||||
|
// complete.
|
||||||
|
// 2. Restore the correct value of vccz by writing the current value
|
||||||
|
// of vcc back to vcc.
|
||||||
|
|
||||||
|
if (TII->isSMRD(I->getOpcode())) {
|
||||||
|
VCCZCorrupt = true;
|
||||||
|
} else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
|
||||||
|
// FIXME: We only care about SMRD instructions here, not LDS or GDS.
|
||||||
|
// Whenever we store a value in vcc, the correct value of vccz is
|
||||||
|
// restored.
|
||||||
|
VCCZCorrupt = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we need to apply the bug work-around
|
||||||
|
if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
|
||||||
|
DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
|
||||||
|
|
||||||
|
// Wait on everything, not just LGKM. vccz reads usually come from
|
||||||
|
// terminators, and we always wait on everything at the end of the
|
||||||
|
// block, so if we only wait on LGKM here, we might end up with
|
||||||
|
// another s_waitcnt inserted right after this if there are non-LGKM
|
||||||
|
// instructions still outstanding.
|
||||||
|
insertWait(MBB, I, LastIssued);
|
||||||
|
|
||||||
|
// Restore the vccz bit. Any time a value is written to vcc, the vcc
|
||||||
|
// bit is updated, so we can restore the bit by reading the value of
|
||||||
|
// vcc and then writing it back to the register.
|
||||||
|
BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
|
||||||
|
AMDGPU::VCC)
|
||||||
|
.addReg(AMDGPU::VCC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Wait for everything before a barrier.
|
// Wait for everything before a barrier.
|
||||||
if (I->getOpcode() == AMDGPU::S_BARRIER)
|
if (I->getOpcode() == AMDGPU::S_BARRIER)
|
||||||
Changes |= insertWait(MBB, I, LastIssued);
|
Changes |= insertWait(MBB, I, LastIssued);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user