mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
87f5640efb
In terms of waitcnt insertion/if necessary, the waitcnt pass forces convergence for a loop. Previously, that kicked if greater than 2 passes over a loop, which doesn't account for loop with many bottom blocks. So, increase the threshold to (n+1), where n is the number of bottom blocks. This gives the pass an opportunity to consider the contribution of each bottom block, to the overall loop, before the forced convergence potentially kicks in. Differential Revision: https://reviews.llvm.org/D47488 llvm-svn: 333556
94 lines
3.0 KiB
YAML
94 lines
3.0 KiB
YAML
# RUN: llc -o - %s -march=amdgcn -mcpu=fiji -run-pass=si-insert-waitcnts -verify-machineinstrs | FileCheck -check-prefix=GCN %s
|
|
|
|
# GCN-LABEL: waitcnt-back-edge-loop
|
|
# GCN: bb.2
|
|
# GCN: S_WAITCNT 112
|
|
# GCN: $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
|
|
|
|
---
|
|
name: waitcnt-back-edge-loop
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
|
|
$vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
|
|
$vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
|
|
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
|
|
$vgpr3 = V_CNDMASK_B32_e64 -1082130432, 1065353216, killed $sgpr0_sgpr1, implicit $exec
|
|
$vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.3:
|
|
successors: %bb.1
|
|
|
|
$vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
|
|
|
|
bb.1:
|
|
successors: %bb.5, %bb.2
|
|
|
|
$vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec
|
|
V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
|
|
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
|
|
S_CBRANCH_VCCZ %bb.5, implicit killed $vcc
|
|
|
|
bb.2:
|
|
successors: %bb.4, %bb.3
|
|
|
|
V_CMP_EQ_U32_e32 9, killed $vgpr5, implicit-def $vcc, implicit $exec
|
|
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
|
|
S_CBRANCH_VCCZ %bb.3, implicit killed $vcc
|
|
|
|
bb.4:
|
|
successors: %bb.3, %bb.1
|
|
|
|
$vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
|
|
$vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec
|
|
V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
|
|
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
|
|
$vgpr4 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $exec
|
|
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
|
S_BRANCH %bb.3
|
|
|
|
bb.5:
|
|
|
|
$vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $exec
|
|
EXP_DONE 12, killed $vgpr4, undef $vgpr0, undef $vgpr0, undef $vgpr0, 0, 0, 15, implicit $exec
|
|
S_ENDPGM
|
|
...
|
|
---
|
|
|
|
# GCN-LABEL: name: waitcnt-multiple-back-edges{{$}}
|
|
# GCN: bb.0:
|
|
# GCN: S_WAITCNT 0
|
|
# GCN-NEXT: S_BRANCH %bb.2
|
|
|
|
name: waitcnt-multiple-back-edges
|
|
body: |
|
|
bb.0:
|
|
S_BRANCH %bb.2
|
|
|
|
bb.1:
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_CBRANCH_VCCZ %bb.1, implicit $vcc
|
|
|
|
bb.3:
|
|
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
|
|
|
|
bb.4:
|
|
BUFFER_ATOMIC_ADD_OFFSET renamable $vgpr0, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 4, 0, implicit $exec
|
|
S_CBRANCH_SCC0 %bb.2, implicit $scc
|
|
S_BRANCH %bb.6
|
|
|
|
bb.5:
|
|
S_CBRANCH_SCC0 %bb.2, implicit $scc
|
|
S_BRANCH %bb.6
|
|
|
|
bb.6:
|
|
S_CBRANCH_SCC1 %bb.0, implicit $scc
|
|
S_ENDPGM
|
|
...
|