1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AMDGPU] Always create Stack Object for reserved VGPR

As we may overwrite inactive lanes of a caller-save-vgpr, we should
always save/restore the reserved vgpr for sgpr spill.

Reviewed by: arsenm

Differential Revision: https://reviews.llvm.org/D98319
This commit is contained in:
Ruiling Song 2021-03-10 11:04:54 +08:00
parent bff211afb1
commit d4ef89cda8
3 changed files with 50 additions and 26 deletions

View File

@ -262,13 +262,10 @@ static bool lowerShiftReservedVGPR(MachineFunction &MF,
if (!LowestAvailableVGPR)
LowestAvailableVGPR = PreReservedVGPR;
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
Optional<int> FI;
// Check if we are reserving a CSR. Create a stack object for a possible spill
// in the function prologue.
if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
FI = FrameInfo.CreateSpillStackObject(4, Align(4));
// Create a stack object for a possible spill in the function prologue.
// Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
// Find saved info about the pre-reserved register.
const auto *ReservedVGPRInfoItr =

View File

@ -269,6 +269,10 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 v1, s33, 63
; GCN-COUNT-60: v_writelane_b32 v1
; GCN: s_mov_b32 s33, s32
@ -280,11 +284,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
; GCN: ;;#ASMSTART
; GCN: v_writelane_b32 v1
; MUBUF: s_add_u32 s32, s32, 0x300
; MUBUF: s_sub_u32 s32, s32, 0x300
; FLATSCR: s_add_u32 s32, s32, 12
; FLATSCR: s_sub_u32 s32, s32, 12
; MUBUF: s_add_u32 s32, s32, 0x400
; MUBUF: s_sub_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; FLATSCR: s_sub_u32 s32, s32, 16
; GCN-NEXT: v_readlane_b32 s33, v1, 63
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @last_lane_vgpr_for_fp_csr() #1 {
@ -306,6 +314,10 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-COUNT-62: v_writelane_b32 v1,
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_mov_b32 s33, s32
@ -318,12 +330,16 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; GCN: v_writelane_b32 v1,
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; MUBUF: s_add_u32 s32, s32, 0x300
; FLATSCR: s_add_u32 s32, s32, 12
; MUBUF: s_add_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @no_new_vgpr_for_fp_csr() #1 {
@ -368,6 +384,10 @@ define void @realign_stack_no_fp_elim() #1 {
; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 v1, s33, 2
; GCN-NEXT: v_writelane_b32 v1, s30, 0
; GCN-NEXT: s_mov_b32 s33, s32
@ -377,15 +397,20 @@ define void @realign_stack_no_fp_elim() #1 {
; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN: ;;#ASMSTART
; MUBUF: v_readlane_b32 s4, v1, 0
; MUBUF-NEXT: s_add_u32 s32, s32, 0x200
; MUBUF: s_add_u32 s32, s32, 0x300
; MUBUF-NEXT: v_readlane_b32 s4, v1, 0
; MUBUF-NEXT: v_readlane_b32 s5, v1, 1
; FLATSCR: v_readlane_b32 s0, v1, 0
; FLATSCR-NEXT: s_add_u32 s32, s32, 8
; FLATSCR: s_add_u32 s32, s32, 12
; FLATSCR-NEXT: v_readlane_b32 s0, v1, 0
; FLATSCR-NEXT: v_readlane_b32 s1, v1, 1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x200
; FLATSCR-NEXT: s_sub_u32 s32, s32, 8
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
; GCN-NEXT: v_readlane_b32 s33, v1, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[4:5]
; FLATSCR-NEXT: s_setpc_b64 s[0:1]
define void @no_unused_non_csr_sgpr_for_fp() #1 {
@ -645,9 +670,11 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; scratch VGPR to hold the offset.
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
; MUBUF: s_or_saveexec_b64 s[4:5], -1
; MUBUF: v_mov_b32_e32 v0, 0x1008
; MUBUF-NEXT: buffer_store_dword v39, v0, s[0:3], s32 offen ; 4-byte Folded Spill
; MUBUF: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x1008
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1008
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x100c
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004
; FLATSCR: v_mov_b32_e32 v0, 0

View File

@ -293,12 +293,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
; GCN: s_or_saveexec_b64 s[4:5], -1
; GCN: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x1084
; GCN-NEXT: v_mov_b32_e32 v1, 0x1084
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
; GCN: v_mov_b32_e32 v0, s34
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
; GCN: v_mov_b32_e32 v0, s34
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
; GCN-NEXT: v_mov_b32_e32 v1, 0x108c
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
%local_val = alloca i32, align 128, addrspace(5)
store volatile i32 %b, i32 addrspace(5)* %local_val, align 128