mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AMDGPU] Always create Stack Object for reserved VGPR
As we may overwrite inactive lanes of a caller-save-vgpr, we should always save/restore the reserved vgpr for sgpr spill. Reviewed by: arsenm Differential Revision: https://reviews.llvm.org/D98319
This commit is contained in:
parent
bff211afb1
commit
d4ef89cda8
@ -262,13 +262,10 @@ static bool lowerShiftReservedVGPR(MachineFunction &MF,
|
||||
if (!LowestAvailableVGPR)
|
||||
LowestAvailableVGPR = PreReservedVGPR;
|
||||
|
||||
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
Optional<int> FI;
|
||||
// Check if we are reserving a CSR. Create a stack object for a possible spill
|
||||
// in the function prologue.
|
||||
if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
|
||||
FI = FrameInfo.CreateSpillStackObject(4, Align(4));
|
||||
// Create a stack object for a possible spill in the function prologue.
|
||||
// Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
|
||||
Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
|
||||
|
||||
// Find saved info about the pre-reserved register.
|
||||
const auto *ReservedVGPRInfoItr =
|
||||
|
@ -269,6 +269,10 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
|
||||
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
|
||||
; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-NEXT: v_writelane_b32 v1, s33, 63
|
||||
; GCN-COUNT-60: v_writelane_b32 v1
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
@ -280,11 +284,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN: v_writelane_b32 v1
|
||||
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x300
|
||||
; FLATSCR: s_add_u32 s32, s32, 12
|
||||
; FLATSCR: s_sub_u32 s32, s32, 12
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; FLATSCR: s_sub_u32 s32, s32, 16
|
||||
; GCN-NEXT: v_readlane_b32 s33, v1, 63
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @last_lane_vgpr_for_fp_csr() #1 {
|
||||
@ -306,6 +314,10 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
|
||||
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
|
||||
; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-COUNT-62: v_writelane_b32 v1,
|
||||
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
@ -318,12 +330,16 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
|
||||
; GCN: v_writelane_b32 v1,
|
||||
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300
|
||||
; FLATSCR: s_add_u32 s32, s32, 12
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
|
||||
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @no_new_vgpr_for_fp_csr() #1 {
|
||||
@ -368,6 +384,10 @@ define void @realign_stack_no_fp_elim() #1 {
|
||||
|
||||
; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-NEXT: v_writelane_b32 v1, s33, 2
|
||||
; GCN-NEXT: v_writelane_b32 v1, s30, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
@ -377,15 +397,20 @@ define void @realign_stack_no_fp_elim() #1 {
|
||||
; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN: ;;#ASMSTART
|
||||
; MUBUF: v_readlane_b32 s4, v1, 0
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300
|
||||
; MUBUF-NEXT: v_readlane_b32 s4, v1, 0
|
||||
; MUBUF-NEXT: v_readlane_b32 s5, v1, 1
|
||||
; FLATSCR: v_readlane_b32 s0, v1, 0
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 8
|
||||
; FLATSCR: s_add_u32 s32, s32, 12
|
||||
; FLATSCR-NEXT: v_readlane_b32 s0, v1, 0
|
||||
; FLATSCR-NEXT: v_readlane_b32 s1, v1, 1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 8
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
|
||||
; GCN-NEXT: v_readlane_b32 s33, v1, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_setpc_b64 s[4:5]
|
||||
; FLATSCR-NEXT: s_setpc_b64 s[0:1]
|
||||
define void @no_unused_non_csr_sgpr_for_fp() #1 {
|
||||
@ -645,9 +670,11 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
|
||||
; scratch VGPR to hold the offset.
|
||||
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; MUBUF: s_or_saveexec_b64 s[4:5], -1
|
||||
; MUBUF: v_mov_b32_e32 v0, 0x1008
|
||||
; MUBUF-NEXT: buffer_store_dword v39, v0, s[0:3], s32 offen ; 4-byte Folded Spill
|
||||
; MUBUF: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1008
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1008
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x100c
|
||||
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004
|
||||
; FLATSCR: v_mov_b32_e32 v0, 0
|
||||
|
@ -293,12 +293,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
|
||||
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; GCN: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1084
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1084
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
; GCN: v_mov_b32_e32 v0, s34
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
; GCN: v_mov_b32_e32 v0, s34
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x108c
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
%local_val = alloca i32, align 128, addrspace(5)
|
||||
store volatile i32 %b, i32 addrspace(5)* %local_val, align 128
|
||||
|
Loading…
Reference in New Issue
Block a user