mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
3016803f3e
The support is disabled by default. So far there is instruction selection, spilling, and frame elimination. It also changes SP from unswizzled to swizzled as used by flat scratch instructions, so it cannot be mixed with MUBUF stack access. At the very least missing: - GlobalISel; - Some optimizations in frame elimination in between vector and scalar ALU; - It shall finally allow to always materialize frame index as an SGPR, but that is not implemented and frame elimination cannot handle it yet; - Unaligned and/or multidword flat scratch shall work, but it is legalized now for MUBUF; - Operand folding cannot optimize FI like with MUBUF yet; - It will need scaling the value of the SP/FP in the DWARF expression to recover the unswizzled scratch address; Differential Revision: https://reviews.llvm.org/D89170
468 lines
18 KiB
YAML
468 lines
18 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,MUBUF %s
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN32,MUBUF %s
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,FLATSCR %s
|
|
|
|
|
|
# CHECK-LABEL: name: check_spill
|
|
|
|
# FLATSCR: $sgpr33 = S_MOV_B32 0
|
|
# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
|
|
# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
|
|
|
|
# S32 with kill
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# CHECK: $exec_lo = S_MOV_B32 1
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 4
|
|
# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
|
|
# S32 without kill
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# CHECK: $exec_lo = S_MOV_B32 1
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 4
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
|
|
# S64 with kill
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 3
|
|
# GCN64: $exec = S_MOV_B64 3
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 8
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S64 without kill
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 3
|
|
# GCN64: $exec = S_MOV_B64 3
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 8
|
|
# GCN32: $exec_lo = S_MOV_B32 $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13
|
|
# GCN64: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
|
|
# S96
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 7
|
|
# GCN64: $exec = S_MOV_B64 7
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 16
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S128
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 15
|
|
# GCN64: $exec = S_MOV_B64 15
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 28
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S160
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 31
|
|
# GCN64: $exec = S_MOV_B64 31
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 44
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S256
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 255
|
|
# GCN64: $exec = S_MOV_B64 255
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 64
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S512
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 65535
|
|
# GCN64: $exec = S_MOV_B64 65535
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 96
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
|
|
# S1024
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# CHECK: V_WRITELANE
|
|
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 4294967295
|
|
# GCN64: $exec = S_MOV_B64 4294967295
|
|
# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
|
# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 160
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
|
|
|
|
--- |
|
|
|
|
define amdgpu_kernel void @check_spill() #0 {
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @check_reload() #0 {
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "frame-pointer"="all" }
|
|
...
|
|
---
|
|
name: check_spill
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$sgpr4_sgpr5' }
|
|
- { reg: '$sgpr6_sgpr7' }
|
|
- { reg: '$sgpr8' }
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
stack:
|
|
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
|
|
- { id: 1, type: spill-slot, size: 8, alignment: 4 }
|
|
- { id: 2, type: spill-slot, size: 12, alignment: 4 }
|
|
- { id: 3, type: spill-slot, size: 16, alignment: 4 }
|
|
- { id: 4, type: spill-slot, size: 20, alignment: 4 }
|
|
- { id: 5, type: spill-slot, size: 32, alignment: 4 }
|
|
- { id: 6, type: spill-slot, size: 64, alignment: 4 }
|
|
- { id: 7, type: spill-slot, size: 128, alignment: 4 }
|
|
machineFunctionInfo:
|
|
explicitKernArgSize: 660
|
|
maxKernArgAlign: 4
|
|
isEntryFunction: true
|
|
waveLimiter: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
frameOffsetReg: '$sgpr33'
|
|
argumentInfo:
|
|
flatScratchInit: { reg: '$sgpr0_sgpr1' }
|
|
dispatchPtr: { reg: '$sgpr2_sgpr3' }
|
|
privateSegmentBuffer: { reg: '$sgpr4_sgpr5_sgpr6_sgpr7' }
|
|
kernargSegmentPtr: { reg: '$sgpr8_sgpr9' }
|
|
workGroupIDX: { reg: '$sgpr10' }
|
|
privateSegmentWaveByteOffset: { reg: '$sgpr11' }
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
|
|
|
|
renamable $sgpr12 = IMPLICIT_DEF
|
|
SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12 = IMPLICIT_DEF
|
|
SI_SPILL_S32_SAVE $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13 = IMPLICIT_DEF
|
|
SI_SPILL_S64_SAVE killed $sgpr12_sgpr13, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13 = IMPLICIT_DEF
|
|
SI_SPILL_S64_SAVE $sgpr12_sgpr13, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
|
|
SI_SPILL_S96_SAVE killed $sgpr12_sgpr13_sgpr14, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
|
|
SI_SPILL_S128_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
|
|
SI_SPILL_S160_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
|
|
SI_SPILL_S256_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
|
|
SI_SPILL_S512_SAVE killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
|
|
SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
|
|
# CHECK-LABEL: name: check_reload
|
|
|
|
# FLATSCR: $sgpr33 = S_MOV_B32 0
|
|
# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
|
|
# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
|
|
|
|
# S32
|
|
# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# CHECK: $exec_lo = S_MOV_B32 1
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4
|
|
# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
|
|
# S64
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 3
|
|
# GCN64: $exec = S_MOV_B64 3
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
|
|
# S96
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 7
|
|
# GCN64: $exec = S_MOV_B64 7
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr14 = V_READLANE
|
|
|
|
# S128
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 15
|
|
# GCN64: $exec = S_MOV_B64 15
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr14 = V_READLANE
|
|
# CHECK: $sgpr15 = V_READLANE
|
|
|
|
# S160
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 31
|
|
# GCN64: $exec = S_MOV_B64 31
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr14 = V_READLANE
|
|
# CHECK: $sgpr15 = V_READLANE
|
|
# CHECK: $sgpr16 = V_READLANE
|
|
|
|
# S256
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 255
|
|
# GCN64: $exec = S_MOV_B64 255
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr14 = V_READLANE
|
|
# CHECK: $sgpr15 = V_READLANE
|
|
# CHECK: $sgpr16 = V_READLANE
|
|
# CHECK: $sgpr17 = V_READLANE
|
|
# CHECK: $sgpr18 = V_READLANE
|
|
# CHECK: $sgpr19 = V_READLANE
|
|
|
|
# S512
|
|
# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 65535
|
|
# GCN64: $exec = S_MOV_B64 65535
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
|
|
# CHECK: $sgpr12 = V_READLANE
|
|
# CHECK: $sgpr13 = V_READLANE
|
|
# CHECK: $sgpr14 = V_READLANE
|
|
# CHECK: $sgpr15 = V_READLANE
|
|
# CHECK: $sgpr16 = V_READLANE
|
|
# CHECK: $sgpr17 = V_READLANE
|
|
# CHECK: $sgpr18 = V_READLANE
|
|
# CHECK: $sgpr19 = V_READLANE
|
|
# CHECK: $sgpr20 = V_READLANE
|
|
# CHECK: $sgpr21 = V_READLANE
|
|
# CHECK: $sgpr22 = V_READLANE
|
|
# CHECK: $sgpr23 = V_READLANE
|
|
# CHECK: $sgpr24 = V_READLANE
|
|
# CHECK: $sgpr25 = V_READLANE
|
|
# CHECK: $sgpr26 = V_READLANE
|
|
# CHECK: $sgpr27 = V_READLANE
|
|
|
|
# S1024
|
|
# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
|
|
# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
|
|
# GCN32: $exec_lo = S_MOV_B32 4294967295
|
|
# GCN64: $exec = S_MOV_B64 4294967295
|
|
# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
|
|
# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160
|
|
# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
|
|
# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
|
|
# CHECK: $sgpr64 = V_READLANE
|
|
# CHECK: $sgpr65 = V_READLANE
|
|
# CHECK: $sgpr66 = V_READLANE
|
|
# CHECK: $sgpr67 = V_READLANE
|
|
# CHECK: $sgpr68 = V_READLANE
|
|
# CHECK: $sgpr69 = V_READLANE
|
|
# CHECK: $sgpr70 = V_READLANE
|
|
# CHECK: $sgpr71 = V_READLANE
|
|
# CHECK: $sgpr72 = V_READLANE
|
|
# CHECK: $sgpr73 = V_READLANE
|
|
# CHECK: $sgpr74 = V_READLANE
|
|
# CHECK: $sgpr75 = V_READLANE
|
|
# CHECK: $sgpr76 = V_READLANE
|
|
# CHECK: $sgpr77 = V_READLANE
|
|
# CHECK: $sgpr78 = V_READLANE
|
|
# CHECK: $sgpr79 = V_READLANE
|
|
# CHECK: $sgpr80 = V_READLANE
|
|
# CHECK: $sgpr81 = V_READLANE
|
|
# CHECK: $sgpr82 = V_READLANE
|
|
# CHECK: $sgpr83 = V_READLANE
|
|
# CHECK: $sgpr84 = V_READLANE
|
|
# CHECK: $sgpr85 = V_READLANE
|
|
# CHECK: $sgpr86 = V_READLANE
|
|
# CHECK: $sgpr87 = V_READLANE
|
|
# CHECK: $sgpr88 = V_READLANE
|
|
# CHECK: $sgpr89 = V_READLANE
|
|
# CHECK: $sgpr90 = V_READLANE
|
|
# CHECK: $sgpr91 = V_READLANE
|
|
# CHECK: $sgpr92 = V_READLANE
|
|
# CHECK: $sgpr93 = V_READLANE
|
|
# CHECK: $sgpr94 = V_READLANE
|
|
# CHECK: $sgpr95 = V_READLANE
|
|
|
|
---
|
|
name: check_reload
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$sgpr4_sgpr5' }
|
|
- { reg: '$sgpr6_sgpr7' }
|
|
- { reg: '$sgpr8' }
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
stack:
|
|
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
|
|
- { id: 1, type: spill-slot, size: 8, alignment: 4 }
|
|
- { id: 2, type: spill-slot, size: 12, alignment: 4 }
|
|
- { id: 3, type: spill-slot, size: 16, alignment: 4 }
|
|
- { id: 4, type: spill-slot, size: 20, alignment: 4 }
|
|
- { id: 5, type: spill-slot, size: 32, alignment: 4 }
|
|
- { id: 6, type: spill-slot, size: 64, alignment: 4 }
|
|
- { id: 7, type: spill-slot, size: 128, alignment: 4 }
|
|
machineFunctionInfo:
|
|
explicitKernArgSize: 660
|
|
maxKernArgAlign: 4
|
|
isEntryFunction: true
|
|
waveLimiter: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
frameOffsetReg: '$sgpr33'
|
|
argumentInfo:
|
|
flatScratchInit: { reg: '$sgpr0_sgpr1' }
|
|
dispatchPtr: { reg: '$sgpr2_sgpr3' }
|
|
privateSegmentBuffer: { reg: '$sgpr4_sgpr5_sgpr6_sgpr7' }
|
|
kernargSegmentPtr: { reg: '$sgpr8_sgpr9' }
|
|
workGroupIDX: { reg: '$sgpr10' }
|
|
privateSegmentWaveByteOffset: { reg: '$sgpr11' }
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
|
|
|
|
renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14 = SI_SPILL_S96_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = SI_SPILL_S160_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S256_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|
|
|
|
renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
|