1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[AMDGPU] Save fp/bp after csr saves

Saving callee-save registers happens in whole wave mode. Exec is saved
to a free register, which can be reused to save the frame pointer.
Therefore, saving the fp needs to happen after saving csrs.

Differential Revision: https://reviews.llvm.org/D95861
This commit is contained in:
Sebastian Neubauer 2021-02-01 16:38:50 +01:00
parent eddb3ffdfb
commit 1d5127c810
2 changed files with 73 additions and 35 deletions

View File

@ -881,39 +881,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
TargetStackID::SGPRSpill;
}
// Emit the copy if we need an FP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
.addReg(FramePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
// Emit the copy if we need a BP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
FuncInfo->SGPRForBPSaveRestoreCopy)
.addReg(BasePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
// If a copy has been emitted for FP and/or BP, Make the SGPRs
// used in the copy instructions live throughout the function.
SmallVector<MCPhysReg, 2> TempSGPRs;
if (FuncInfo->SGPRForFPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
if (FuncInfo->SGPRForBPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
if (!TempSGPRs.empty()) {
for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : TempSGPRs)
MBB.addLiveIn(Reg);
MBB.sortUniqueLiveIns();
}
}
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
@ -1007,6 +974,44 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(Spill[0].VGPR, RegState::Undef);
}
// Emit the copy if we need an FP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
FuncInfo->SGPRForFPSaveRestoreCopy)
.addReg(FramePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
// Emit the copy if we need a BP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
FuncInfo->SGPRForBPSaveRestoreCopy)
.addReg(BasePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
// If a copy has been emitted for FP and/or BP, Make the SGPRs
// used in the copy instructions live throughout the function.
SmallVector<MCPhysReg, 2> TempSGPRs;
if (FuncInfo->SGPRForFPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
if (FuncInfo->SGPRForBPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
if (!TempSGPRs.empty()) {
for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : TempSGPRs)
MBB.addLiveIn(Reg);
MBB.sortUniqueLiveIns();
}
if (!LiveRegs.empty()) {
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
}
}
if (TRI.needsStackRealignment(MF)) {
HasFP = true;
const unsigned Alignment = MFI.getMaxAlign().value();
@ -1015,8 +1020,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
}
Register ScratchSPReg = findScratchNonCalleeSaveRegister(

View File

@ -597,6 +597,41 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
ret void
}
; When flat-scratch is enabled, we save the FP to s0. At the same time,
; the exec register is saved to s0 when saving CSR in the function prolog.
; Make sure that the FP save happens after restoring exec from the same
; register.
; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_reg:
; GCN-NOT: v_writelane_b32 v40, s33
; FLATSCR: s_or_saveexec_b64 s[0:1], -1
; FLATSCR: s_mov_b64 exec, s[0:1]
; FLATSCR: s_mov_b32 s0, s33
; FLATSCR: s_mov_b32 s33, s32
; FLATSCR: s_mov_b32 s33, s0
; FLATSCR: s_or_saveexec_b64 s[0:1], -1
; GCN-NOT: v_readlane_b32 s33, v40
; GCN: s_setpc_b64
define void @callee_need_to_spill_fp_to_reg() #1 {
call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
"~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
call void asm sideeffect "; clobber all VGPRs except CSR v40",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
ret void
}
; If the size of the offset exceeds the MUBUF offset field we need another
; scratch VGPR to hold the offset.
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset