1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00
llvm-mirror/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
Mark Searles 87f5640efb [AMDGPU][Waitcnt] Fix handling of loops with many bottom blocks
In terms of waitcnt insertion/if necessary, the waitcnt pass forces convergence
for a loop. Previously, that kicked if greater than 2 passes over a loop, which
doesn't account for loop with many bottom blocks. So, increase the threshold to
(n+1), where n is the number of bottom blocks. This gives the pass an
opportunity to consider the contribution of each bottom block, to the overall
loop, before the forced convergence potentially kicks in.

Differential Revision: https://reviews.llvm.org/D47488

llvm-svn: 333556
2018-05-30 15:47:45 +00:00

94 lines
3.0 KiB
YAML

# RUN: llc -o - %s -march=amdgcn -mcpu=fiji -run-pass=si-insert-waitcnts -verify-machineinstrs | FileCheck -check-prefix=GCN %s
# GCN-LABEL: waitcnt-back-edge-loop
# GCN: bb.2
# GCN: S_WAITCNT 112
# GCN: $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
---
name: waitcnt-back-edge-loop
body: |
bb.0:
successors: %bb.1
$vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
$vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
$vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
$vgpr3 = V_CNDMASK_B32_e64 -1082130432, 1065353216, killed $sgpr0_sgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
S_BRANCH %bb.1
bb.3:
successors: %bb.1
$vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
bb.1:
successors: %bb.5, %bb.2
$vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCZ %bb.5, implicit killed $vcc
bb.2:
successors: %bb.4, %bb.3
V_CMP_EQ_U32_e32 9, killed $vgpr5, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCZ %bb.3, implicit killed $vcc
bb.4:
successors: %bb.3, %bb.1
$vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec
V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
$vgpr4 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $exec
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
S_BRANCH %bb.3
bb.5:
$vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $exec
EXP_DONE 12, killed $vgpr4, undef $vgpr0, undef $vgpr0, undef $vgpr0, 0, 0, 15, implicit $exec
S_ENDPGM
...
---
# GCN-LABEL: name: waitcnt-multiple-back-edges{{$}}
# GCN: bb.0:
# GCN: S_WAITCNT 0
# GCN-NEXT: S_BRANCH %bb.2
name: waitcnt-multiple-back-edges
body: |
bb.0:
S_BRANCH %bb.2
bb.1:
S_BRANCH %bb.2
bb.2:
S_CBRANCH_VCCZ %bb.1, implicit $vcc
bb.3:
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
bb.4:
BUFFER_ATOMIC_ADD_OFFSET renamable $vgpr0, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 4, 0, implicit $exec
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.6
bb.5:
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.6
bb.6:
S_CBRANCH_SCC1 %bb.0, implicit $scc
S_ENDPGM
...