mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Correct prolog SP initialization logic
Having callees that will read SP is not the only reason we need to reference the stack pointer.
This commit is contained in:
parent
d688e1d62e
commit
21c0f36dab
@ -447,7 +447,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
||||
}
|
||||
assert(ScratchWaveOffsetReg);
|
||||
|
||||
if (MF.getFrameInfo().hasCalls()) {
|
||||
if (requiresStackPointerReference(MF)) {
|
||||
Register SPReg = MFI->getStackPtrOffsetReg();
|
||||
assert(SPReg != AMDGPU::SP_REG);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
|
||||
@ -1262,6 +1262,20 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
||||
return MBB.erase(I);
|
||||
}
|
||||
|
||||
/// Returns true if the frame will require a reference to the stack pointer.
|
||||
///
|
||||
/// This is the set of conditions common to setting up the stack pointer in a
|
||||
/// kernel, and for using a frame pointer in a callable function.
|
||||
///
|
||||
/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
|
||||
/// references SP.
|
||||
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
|
||||
return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
|
||||
}
|
||||
|
||||
// The FP for kernels is always known 0, so we never really need to setup an
|
||||
// explicit register for it. However, DisableFramePointerElim will force us to
|
||||
// use a register for it.
|
||||
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
@ -1277,8 +1291,31 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return MFI.getStackSize() != 0;
|
||||
}
|
||||
|
||||
return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
|
||||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
||||
return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
|
||||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
|
||||
MF.getTarget().Options.DisableFramePointerElim(MF);
|
||||
}
|
||||
|
||||
// This is essentially a reduced version of hasFP for entry functions. Since the
|
||||
// stack pointer is known 0 on entry to kernels, we never really need an FP
|
||||
// register. We may need to initialize the stack pointer depending on the frame
|
||||
// properties, which logically overlaps many of the cases where an ordinary
|
||||
// function would require an FP.
|
||||
bool SIFrameLowering::requiresStackPointerReference(
|
||||
const MachineFunction &MF) const {
|
||||
// Callable functions always require a stack pointer reference.
|
||||
assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
|
||||
"only expected to call this for entry points");
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
// Entry points ordinarily don't need to initialize SP. We have to set it up
|
||||
// for callees if there are any. Also note tail calls are impossible/don't
|
||||
// make any sense for kernels.
|
||||
if (MFI.hasCalls())
|
||||
return true;
|
||||
|
||||
// We still need to initialize the SP if we're doing anything weird that
|
||||
// references the SP, like variable sized stack objects.
|
||||
return frameTriviallyRequiresSP(MFI);
|
||||
}
|
||||
|
@ -71,6 +71,8 @@ private:
|
||||
|
||||
public:
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
|
||||
bool requiresStackPointerReference(const MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -15,6 +15,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
|
||||
; GFX9-NEXT: s_and_b32 s4, s4, -16
|
||||
; GFX9-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
|
||||
; GFX9-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
@ -26,6 +27,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
|
||||
; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align4:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_add_u32 s6, s6, s9
|
||||
; GFX10-NEXT: s_movk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_mov_b32 s33, 0
|
||||
; GFX10-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||||
@ -117,6 +119,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
|
||||
; GFX9-NEXT: s_and_b32 s4, s4, -16
|
||||
; GFX9-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
|
||||
; GFX9-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
@ -128,6 +131,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
|
||||
; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align16:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_add_u32 s6, s6, s9
|
||||
; GFX10-NEXT: s_movk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_mov_b32 s33, 0
|
||||
; GFX10-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||||
@ -219,6 +223,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
|
||||
; GFX9-NEXT: s_and_b32 s4, s4, -16
|
||||
; GFX9-NEXT: s_movk_i32 s32, 0x800
|
||||
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
|
||||
; GFX9-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX9-NEXT: s_and_b32 s4, s4, 0xfffff800
|
||||
@ -231,6 +236,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
|
||||
; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_add_u32 s6, s6, s9
|
||||
; GFX10-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX10-NEXT: s_mov_b32 s33, 0
|
||||
; GFX10-NEXT: s_addc_u32 s7, s7, 0
|
||||
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||||
|
@ -18,12 +18,13 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
||||
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s9
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_movk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GCN-NEXT: s_and_b32 s6, s6, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB0_3
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0xc
|
||||
@ -99,12 +100,13 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
||||
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s9
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_movk_i32 s32, 0x1000
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GCN-NEXT: s_and_b32 s6, s6, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
|
||||
|
@ -18,6 +18,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s9
|
||||
; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_movk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s8, 0
|
||||
@ -89,6 +90,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s9
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_movk_i32 s32, 0x1000
|
||||
; GCN-NEXT: s_mov_b32 s33, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_cmp_lg_u32 s6, 0
|
||||
|
Loading…
Reference in New Issue
Block a user