1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

AMDGPU/SI: Insert wait states required after v_readfirstlane on SI

Summary:
We will be able to handle this case much better once the hazard recognizer
is finished, but this conservative implementation  fixes a hang with the piglit
test:

spec/arb_arrays_of_arrays/execution/sampler/fs-nested-struct-arrays-nonconst-nested-arra

Reviewers: arsenm, nhaehnle

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18988

llvm-svn: 266105
This commit is contained in:
Tom Stellard 2016-04-12 18:40:43 +00:00
parent 0740a1bead
commit ec17b58f39
3 changed files with 8 additions and 0 deletions

View File

@ -601,6 +601,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
insertDPPWaitStates(I);
}
// Insert required wait states for SMRD reading an SGPR written by a VALU
// instruction.
if (ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS &&
I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32)
TII->insertWaitStates(MBB, std::next(I), 4);
// Wait for everything before a barrier.
if (I->getOpcode() == AMDGPU::S_BARRIER)
Changes |= insertWait(MBB, I, LastIssued);

View File

@ -10,6 +10,7 @@
; SI: buffer_store_dword
; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
; SI-NEXT: s_nop
; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
; SI: buffer_store_dword
; SI: s_endpgm

View File

@ -56,6 +56,7 @@ done: ; preds = %loop
; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0
; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
; SI-NEXT: s_nop
; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]]
; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]