1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[AMDGPU] Remove unused tmp register

The temporary register is only used to compute the frame pointer.
The frame pointer is overwritten and not used in between, so we
can reuse the frame pointer for the computation, saving one register.

Differential Revision: https://reviews.llvm.org/D95865
This commit is contained in:
Sebastian Neubauer 2021-02-02 17:08:57 +01:00
parent 1d5127c810
commit e79a30d836
12 changed files with 60 additions and 65 deletions

View File

@ -1022,19 +1022,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
LiveRegs.addLiveIns(MBB);
}
Register ScratchSPReg = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
// s_add_u32 tmp_reg, s32, NumBytes
// s_and_b32 s32, tmp_reg, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
// s_add_u32 s33, s32, NumBytes
// s_and_b32 s33, s33, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
.addReg(StackPtrReg)
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
.addReg(ScratchSPReg, RegState::Kill)
.addReg(FramePtrReg, RegState::Kill)
.addImm(-Alignment * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
FuncInfo->setIsStackRealigned(true);

View File

@ -259,9 +259,9 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_add_u32 s4, s32, 0x7c0
; GFX9-NEXT: s_mov_b32 s6, s33
; GFX9-NEXT: s_and_b32 s33, s4, 0xfffff800
; GFX9-NEXT: s_add_u32 s33, s32, 0x7c0
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
; GFX9-NEXT: s_add_u32 s32, s32, 0x1000
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@ -287,16 +287,16 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_add_u32 s4, s32, 0x3e0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_and_b32 s33, s4, 0xfffffc00
; GFX10-NEXT: s_add_u32 s33, s32, 0x3e0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; GFX10-NEXT: s_add_u32 s32, s32, 0x800
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: s_mov_b32 s33, s6
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)

View File

@ -8,10 +8,10 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
; GCN-LABEL: v_extract_v64i32_varidx:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0
; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
@ -330,10 +330,10 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
; GCN-LABEL: v_extract_v128i16_varidx:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0
; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
@ -657,10 +657,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN-LABEL: v_extract_v32i64_varidx:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill

View File

@ -231,9 +231,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; GCN-LABEL: func_non_entry_block_static_alloca_align64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_add_u32 s4, s32, 0xfc0
; GCN-NEXT: s_mov_b32 s8, s33
; GCN-NEXT: s_and_b32 s33, s4, 0xfffff000
; GCN-NEXT: s_add_u32 s33, s32, 0xfc0
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GCN-NEXT: s_add_u32 s32, s32, 0x2000
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc

View File

@ -335,12 +335,12 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
; GCN: s_waitcnt
; MUBUF-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0
; FLATSCR-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x1fff
; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
; MUBUF-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000
; FLATSCR-NEXT: s_and_b32 s33, [[SCRATCH]], 0xffffe000
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
; MUBUF-NEXT: s_add_u32 s32, s32, 0x100000
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x4000
; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0

View File

@ -110,9 +110,9 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; MUBUF-LABEL: func_local_stack_offset_uses_sp:
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_add_u32 s4, s32, 0x7ffc0
; MUBUF-NEXT: s_mov_b32 s5, s33
; MUBUF-NEXT: s_and_b32 s33, s4, 0xfff80000
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
; MUBUF-NEXT: v_mov_b32_e32 v4, 0
@ -152,9 +152,9 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; FLATSCR-LABEL: func_local_stack_offset_uses_sp:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_add_u32 s0, s32, 0x1fff
; FLATSCR-NEXT: s_mov_b32 s2, s33
; FLATSCR-NEXT: s_and_b32 s33, s0, 0xffffe000
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
; FLATSCR-NEXT: s_mov_b32 s0, 0
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x6000

View File

@ -320,9 +320,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; MUBUF-LABEL: func_non_entry_block_static_alloca_align64:
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_add_u32 s4, s32, 0xfc0
; MUBUF-NEXT: s_mov_b32 s7, s33
; MUBUF-NEXT: s_and_b32 s33, s4, 0xfffff000
; MUBUF-NEXT: s_add_u32 s33, s32, 0xfc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; MUBUF-NEXT: s_add_u32 s32, s32, 0x2000
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -354,9 +354,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; FLATSCR-LABEL: func_non_entry_block_static_alloca_align64:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_add_u32 s0, s32, 63
; FLATSCR-NEXT: s_mov_b32 s3, s33
; FLATSCR-NEXT: s_and_b32 s33, s0, 0xffffffc0
; FLATSCR-NEXT: s_add_u32 s33, s32, 63
; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x80
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc

View File

@ -29,8 +29,8 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
; CHECK: liveins: $sgpr27, $vgpr1
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@ -73,8 +73,8 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
; CHECK: liveins: $sgpr27, $vgpr1
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@ -115,8 +115,8 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
; CHECK: liveins: $sgpr27, $vgpr1
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@ -156,8 +156,8 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
; CHECK: liveins: $sgpr27, $vgpr1
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec

View File

@ -25,8 +25,8 @@ body: |
; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
; MUBUF: liveins: $sgpr27, $vgpr1
; MUBUF: $sgpr27 = frame-setup COPY $sgpr33
; MUBUF: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; MUBUF: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; MUBUF: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; MUBUF: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@ -39,8 +39,8 @@ body: |
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
; FLATSCR: liveins: $sgpr27, $vgpr1
; FLATSCR: $sgpr27 = frame-setup COPY $sgpr33
; FLATSCR: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294959104, implicit-def $scc
; FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
; FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
; FLATSCR: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec

View File

@ -24,8 +24,8 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei
; CHECK: liveins: $sgpr27, $vgpr1
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294705152, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec

View File

@ -26,8 +26,8 @@ body: |
; GFX8-LABEL: name: pei_scavenge_vgpr_spill
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX8: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
@ -42,8 +42,8 @@ body: |
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
@ -57,8 +57,8 @@ body: |
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-FLATSCR: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294959104, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
; GFX9-FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc

View File

@ -124,9 +124,9 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
}
; GCN-LABEL: {{^}}default_realign_align128:
; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0
; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
; GCN-NEXT: s_add_u32 s32, s32, 0x4000
; GCN-NOT: s33
; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
@ -193,11 +193,11 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5
; The BP value will get saved/restored in an SGPR at the prolgoue/epilogue.
; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
; GCN: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0xffc0
; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
; GCN: s_add_u32 s32, s32, 0x30000