1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/sgpr-spill.mir
Stanislav Mekhanoshin 3016803f3e [AMDGPU] Use flat scratch instructions where available
The support is disabled by default. So far there is instruction
selection, spilling, and frame elimination. It also changes SP
from unswizzled to swizzled as used by flat scratch instructions,
so it cannot be mixed with MUBUF stack access.

At the very least missing:

- GlobalISel;
- Some optimizations in frame elimination in between vector
  and scalar ALU;
- It shall finally allow to always materialize frame index
  as an SGPR, but that is not implemented and frame elimination
  cannot handle it yet;
- Unaligned and/or multidword flat scratch shall work, but it
  is legalized now for MUBUF;
- Operand folding cannot optimize FI like with MUBUF yet;
- It will need scaling the value of the SP/FP in the DWARF
  expression to recover the unswizzled scratch address;

Differential Revision: https://reviews.llvm.org/D89170
2020-10-26 14:40:42 -07:00

468 lines
18 KiB
YAML

# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,MUBUF %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN32,MUBUF %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,FLATSCR %s
# CHECK-LABEL: name: check_spill
# FLATSCR: $sgpr33 = S_MOV_B32 0
# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
# S32 with kill
# CHECK: V_WRITELANE
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
# CHECK: $exec_lo = S_MOV_B32 1
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 4
# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
# S32 without kill
# CHECK: V_WRITELANE
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
# CHECK: $exec_lo = S_MOV_B32 1
# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 4
# CHECK: $sgpr12 = V_READLANE
# S64 with kill
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 3
# GCN64: $exec = S_MOV_B64 3
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 8
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S64 without kill
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 3
# GCN64: $exec = S_MOV_B64 3
# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 8
# GCN32: $exec_lo = S_MOV_B32 $sgpr12
# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13
# GCN64: $sgpr13 = V_READLANE
# CHECK: $sgpr12 = V_READLANE
# S96
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 7
# GCN64: $exec = S_MOV_B64 7
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 16
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S128
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 15
# GCN64: $exec = S_MOV_B64 15
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 28
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S160
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 31
# GCN64: $exec = S_MOV_B64 31
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 44
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S256
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 255
# GCN64: $exec = S_MOV_B64 255
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 64
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S512
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 65535
# GCN64: $exec = S_MOV_B64 65535
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 96
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# S1024
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# CHECK: V_WRITELANE
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 4294967295
# GCN64: $exec = S_MOV_B64 4294967295
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 160
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
--- |
define amdgpu_kernel void @check_spill() #0 {
ret void
}
define amdgpu_kernel void @check_reload() #0 {
ret void
}
attributes #0 = { "frame-pointer"="all" }
...
---
name: check_spill
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr8' }
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
- { id: 1, type: spill-slot, size: 8, alignment: 4 }
- { id: 2, type: spill-slot, size: 12, alignment: 4 }
- { id: 3, type: spill-slot, size: 16, alignment: 4 }
- { id: 4, type: spill-slot, size: 20, alignment: 4 }
- { id: 5, type: spill-slot, size: 32, alignment: 4 }
- { id: 6, type: spill-slot, size: 64, alignment: 4 }
- { id: 7, type: spill-slot, size: 128, alignment: 4 }
machineFunctionInfo:
explicitKernArgSize: 660
maxKernArgAlign: 4
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
argumentInfo:
flatScratchInit: { reg: '$sgpr0_sgpr1' }
dispatchPtr: { reg: '$sgpr2_sgpr3' }
privateSegmentBuffer: { reg: '$sgpr4_sgpr5_sgpr6_sgpr7' }
kernargSegmentPtr: { reg: '$sgpr8_sgpr9' }
workGroupIDX: { reg: '$sgpr10' }
privateSegmentWaveByteOffset: { reg: '$sgpr11' }
body: |
bb.0:
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
renamable $sgpr12 = IMPLICIT_DEF
SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12 = IMPLICIT_DEF
SI_SPILL_S32_SAVE $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13 = IMPLICIT_DEF
SI_SPILL_S64_SAVE killed $sgpr12_sgpr13, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13 = IMPLICIT_DEF
SI_SPILL_S64_SAVE $sgpr12_sgpr13, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
SI_SPILL_S96_SAVE killed $sgpr12_sgpr13_sgpr14, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
SI_SPILL_S128_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
SI_SPILL_S160_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
SI_SPILL_S256_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
SI_SPILL_S512_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
# CHECK-LABEL: name: check_reload
# FLATSCR: $sgpr33 = S_MOV_B32 0
# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
# S32
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
# CHECK: $exec_lo = S_MOV_B32 1
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4
# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
# CHECK: $sgpr12 = V_READLANE
# S64
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 3
# GCN64: $exec = S_MOV_B64 3
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# S96
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 7
# GCN64: $exec = S_MOV_B64 7
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# CHECK: $sgpr14 = V_READLANE
# S128
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 15
# GCN64: $exec = S_MOV_B64 15
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# CHECK: $sgpr14 = V_READLANE
# CHECK: $sgpr15 = V_READLANE
# S160
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 31
# GCN64: $exec = S_MOV_B64 31
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# CHECK: $sgpr14 = V_READLANE
# CHECK: $sgpr15 = V_READLANE
# CHECK: $sgpr16 = V_READLANE
# S256
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 255
# GCN64: $exec = S_MOV_B64 255
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# CHECK: $sgpr14 = V_READLANE
# CHECK: $sgpr15 = V_READLANE
# CHECK: $sgpr16 = V_READLANE
# CHECK: $sgpr17 = V_READLANE
# CHECK: $sgpr18 = V_READLANE
# CHECK: $sgpr19 = V_READLANE
# S512
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 65535
# GCN64: $exec = S_MOV_B64 65535
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
# CHECK: $sgpr12 = V_READLANE
# CHECK: $sgpr13 = V_READLANE
# CHECK: $sgpr14 = V_READLANE
# CHECK: $sgpr15 = V_READLANE
# CHECK: $sgpr16 = V_READLANE
# CHECK: $sgpr17 = V_READLANE
# CHECK: $sgpr18 = V_READLANE
# CHECK: $sgpr19 = V_READLANE
# CHECK: $sgpr20 = V_READLANE
# CHECK: $sgpr21 = V_READLANE
# CHECK: $sgpr22 = V_READLANE
# CHECK: $sgpr23 = V_READLANE
# CHECK: $sgpr24 = V_READLANE
# CHECK: $sgpr25 = V_READLANE
# CHECK: $sgpr26 = V_READLANE
# CHECK: $sgpr27 = V_READLANE
# S1024
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
# GCN32: $exec_lo = S_MOV_B32 4294967295
# GCN64: $exec = S_MOV_B64 4294967295
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
# CHECK: $sgpr64 = V_READLANE
# CHECK: $sgpr65 = V_READLANE
# CHECK: $sgpr66 = V_READLANE
# CHECK: $sgpr67 = V_READLANE
# CHECK: $sgpr68 = V_READLANE
# CHECK: $sgpr69 = V_READLANE
# CHECK: $sgpr70 = V_READLANE
# CHECK: $sgpr71 = V_READLANE
# CHECK: $sgpr72 = V_READLANE
# CHECK: $sgpr73 = V_READLANE
# CHECK: $sgpr74 = V_READLANE
# CHECK: $sgpr75 = V_READLANE
# CHECK: $sgpr76 = V_READLANE
# CHECK: $sgpr77 = V_READLANE
# CHECK: $sgpr78 = V_READLANE
# CHECK: $sgpr79 = V_READLANE
# CHECK: $sgpr80 = V_READLANE
# CHECK: $sgpr81 = V_READLANE
# CHECK: $sgpr82 = V_READLANE
# CHECK: $sgpr83 = V_READLANE
# CHECK: $sgpr84 = V_READLANE
# CHECK: $sgpr85 = V_READLANE
# CHECK: $sgpr86 = V_READLANE
# CHECK: $sgpr87 = V_READLANE
# CHECK: $sgpr88 = V_READLANE
# CHECK: $sgpr89 = V_READLANE
# CHECK: $sgpr90 = V_READLANE
# CHECK: $sgpr91 = V_READLANE
# CHECK: $sgpr92 = V_READLANE
# CHECK: $sgpr93 = V_READLANE
# CHECK: $sgpr94 = V_READLANE
# CHECK: $sgpr95 = V_READLANE
---
name: check_reload
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr8' }
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
- { id: 1, type: spill-slot, size: 8, alignment: 4 }
- { id: 2, type: spill-slot, size: 12, alignment: 4 }
- { id: 3, type: spill-slot, size: 16, alignment: 4 }
- { id: 4, type: spill-slot, size: 20, alignment: 4 }
- { id: 5, type: spill-slot, size: 32, alignment: 4 }
- { id: 6, type: spill-slot, size: 64, alignment: 4 }
- { id: 7, type: spill-slot, size: 128, alignment: 4 }
machineFunctionInfo:
explicitKernArgSize: 660
maxKernArgAlign: 4
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
argumentInfo:
flatScratchInit: { reg: '$sgpr0_sgpr1' }
dispatchPtr: { reg: '$sgpr2_sgpr3' }
privateSegmentBuffer: { reg: '$sgpr4_sgpr5_sgpr6_sgpr7' }
kernargSegmentPtr: { reg: '$sgpr8_sgpr9' }
workGroupIDX: { reg: '$sgpr10' }
privateSegmentWaveByteOffset: { reg: '$sgpr11' }
body: |
bb.0:
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14 = SI_SPILL_S96_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = SI_SPILL_S160_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S256_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32