mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
13c67e5bdd
Summary: Up to gfx9, writes to vcc_lo and vcc_hi by instructions like v_readlane and v_readfirstlane do not update vccz to reflect the new value of vcc. Fix it by reusing part of the existing vccz bug handling code, which inserts an "s_mov_b64 vcc, vcc" instruction to restore vccz just before an instruction that needs the correct value. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69661
166 lines
5.5 KiB
YAML
166 lines
5.5 KiB
YAML
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI
|
|
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
|
|
# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_workaround
|
|
# CHECK: $vcc = V_CMP_EQ_F32
|
|
# SI-NEXT: S_WAITCNT 127
|
|
# SI-NEXT: $vcc = S_MOV_B64 $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
|
|
|
|
name: vccz_corrupt_workaround
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
|
|
$sgpr7 = S_MOV_B32 61440
|
|
$sgpr6 = S_MOV_B32 -1
|
|
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
|
|
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
|
|
|
bb.2:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
S_BRANCH %bb.3
|
|
|
|
bb.1:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
|
|
bb.3:
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
$sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
|
|
# CHECK: BUFFER_STORE_DWORD_OFFSET
|
|
# SI-NEXT: S_WAITCNT 3855
|
|
# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
|
|
|
|
name: vccz_corrupt_undef_vcc
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
|
|
$sgpr7 = S_MOV_B32 61440
|
|
$sgpr6 = S_MOV_B32 -1
|
|
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
|
|
|
|
bb.2:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
S_BRANCH %bb.3
|
|
|
|
bb.1:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
|
|
bb.3:
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
$sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
# Test that after reloading vcc spilled to a vgpr, we insert any necessary
|
|
# instructions to fix vccz.
|
|
|
|
# CHECK-LABEL: name: reload_vcc_from_vgpr
|
|
# CHECK: $vcc_lo = V_READLANE_B32_vi $vgpr0, 8, implicit-def $vcc
|
|
# CHECK: $vcc_hi = V_READLANE_B32_vi $vgpr0, 9
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
name: reload_vcc_from_vgpr
|
|
body: |
|
|
bb.0:
|
|
$vcc_lo = V_READLANE_B32_vi $vgpr0, 8, implicit-def $vcc
|
|
$vcc_hi = V_READLANE_B32_vi $vgpr0, 9
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
bb.1:
|
|
|
|
...
|
|
---
|
|
# Test that after reloading vcc spilled to memory, we insert any necessary
|
|
# instructions to fix vccz.
|
|
|
|
# CHECK-LABEL: name: reload_vcc_from_mem
|
|
# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
# CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
|
|
# CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
name: reload_vcc_from_mem
|
|
body: |
|
|
bb.0:
|
|
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
|
|
$vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
|
|
$vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
bb.1:
|
|
|
|
...
|
|
---
|
|
# Test that after inline asm that defines vcc_lo, we insert any necessary
|
|
# instructions to fix vccz.
|
|
|
|
# CHECK-LABEL: name: inlineasm_def_vcc_lo
|
|
# CHECK: INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
|
|
# SI: $vcc = S_MOV_B64 $vcc
|
|
# GFX9: $vcc = S_MOV_B64 $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
name: inlineasm_def_vcc_lo
|
|
body: |
|
|
bb.0:
|
|
INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
bb.1:
|
|
|
|
...
|
|
---
|
|
# Test that after inline asm that defines vcc, no unnecessary instructions are
|
|
# inserted to fix vccz.
|
|
|
|
# CHECK-LABEL: name: inlineasm_def_vcc
|
|
# CHECK: INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
|
|
name: inlineasm_def_vcc
|
|
body: |
|
|
bb.0:
|
|
INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
|
|
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
|
bb.1:
|
|
|
|
...
|