mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AMDGPU] Move insertion of function entry waitcnt later
This allows tracking these as preexisting waitcnt. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D101380
This commit is contained in:
parent
a223875bcf
commit
f82032cf1f
@ -1600,6 +1600,28 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
|
|
||||||
TrackedWaitcntSet.clear();
|
TrackedWaitcntSet.clear();
|
||||||
BlockInfos.clear();
|
BlockInfos.clear();
|
||||||
|
bool Modified = false;
|
||||||
|
|
||||||
|
if (!MFI->isEntryFunction()) {
|
||||||
|
// Wait for any outstanding memory operations that the input registers may
|
||||||
|
// depend on. We can't track them and it's better to do the wait after the
|
||||||
|
// costly call sequence.
|
||||||
|
|
||||||
|
// TODO: Could insert earlier and schedule more liberally with operations
|
||||||
|
// that only use caller preserved registers.
|
||||||
|
MachineBasicBlock &EntryBB = MF.front();
|
||||||
|
MachineBasicBlock::iterator I = EntryBB.begin();
|
||||||
|
for (MachineBasicBlock::iterator E = EntryBB.end();
|
||||||
|
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
|
||||||
|
;
|
||||||
|
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
|
||||||
|
if (ST->hasVscnt())
|
||||||
|
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
|
||||||
|
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
|
||||||
|
.addImm(0);
|
||||||
|
|
||||||
|
Modified = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Keep iterating over the blocks in reverse post order, inserting and
|
// Keep iterating over the blocks in reverse post order, inserting and
|
||||||
// updating s_waitcnt where needed, until a fix point is reached.
|
// updating s_waitcnt where needed, until a fix point is reached.
|
||||||
@ -1607,7 +1629,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
BlockInfos.insert({MBB, BlockInfo(MBB)});
|
BlockInfos.insert({MBB, BlockInfo(MBB)});
|
||||||
|
|
||||||
std::unique_ptr<WaitcntBrackets> Brackets;
|
std::unique_ptr<WaitcntBrackets> Brackets;
|
||||||
bool Modified = false;
|
|
||||||
bool Repeat;
|
bool Repeat;
|
||||||
do {
|
do {
|
||||||
Repeat = false;
|
Repeat = false;
|
||||||
@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!MFI->isEntryFunction()) {
|
|
||||||
// Wait for any outstanding memory operations that the input registers may
|
|
||||||
// depend on. We can't track them and it's better to the wait after the
|
|
||||||
// costly call sequence.
|
|
||||||
|
|
||||||
// TODO: Could insert earlier and schedule more liberally with operations
|
|
||||||
// that only use caller preserved registers.
|
|
||||||
MachineBasicBlock &EntryBB = MF.front();
|
|
||||||
MachineBasicBlock::iterator I = EntryBB.begin();
|
|
||||||
for (MachineBasicBlock::iterator E = EntryBB.end();
|
|
||||||
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
|
|
||||||
;
|
|
||||||
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
|
|
||||||
if (ST->hasVscnt())
|
|
||||||
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
|
|
||||||
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
|
|
||||||
.addImm(0);
|
|
||||||
|
|
||||||
Modified = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Modified;
|
return Modified;
|
||||||
}
|
}
|
||||||
|
@ -192,4 +192,17 @@ body: |
|
|||||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||||
S_WAITCNT 3952
|
S_WAITCNT 3952
|
||||||
KILL $vgpr0
|
KILL $vgpr0
|
||||||
|
|
||||||
|
# Combine preexisting waitcnt with wait added to the start of a non-entry function.
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_waitcnt_preexisting_func_start
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
|
||||||
|
; GFX9: S_WAITCNT 0
|
||||||
|
; GFX9-NOT: S_WAITCNT 0
|
||||||
|
; GFX9: S_ENDPGM 0
|
||||||
|
S_WAITCNT 0
|
||||||
|
S_ENDPGM 0
|
||||||
...
|
...
|
||||||
|
Loading…
Reference in New Issue
Block a user