mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
AMDGPU: Enable the scavenger for large frames
Another test is needed for the case where the scavenge fail, but there's another issue with that which needs an additional fix. llvm-svn: 357093
This commit is contained in:
parent
ec34fe0ec4
commit
b122727d59
@ -257,11 +257,20 @@ bool SIRegisterInfo::requiresFrameIndexScavenging(
|
||||
|
||||
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
|
||||
const MachineFunction &MF) const {
|
||||
// m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
|
||||
// create a virtual register for it during frame index elimination, so the
|
||||
// scavenger is directly needed.
|
||||
return MF.getFrameInfo().hasStackObjects() &&
|
||||
MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
if (!MFI.hasStackObjects())
|
||||
return false;
|
||||
|
||||
// The scavenger is used for large frames which may require finding a free
|
||||
// register for large offsets.
|
||||
if (!isUInt<12>(MFI.getStackSize()))
|
||||
return true;
|
||||
|
||||
// If using scalar stores, for spills, m0 is needed for the scalar store
|
||||
// offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
|
||||
// register for it during frame index elimination, so the scavenger is
|
||||
// directly needed.
|
||||
return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
|
||||
MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
|
||||
}
|
||||
|
||||
|
@ -35,9 +35,8 @@ entry:
|
||||
|
||||
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
|
||||
; 0x40000 / 64 = 4096 (for wave64)
|
||||
; CHECK: s_add_u32 s7, s7, 0x40000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
|
||||
; CHECK: s_sub_u32 s7, s7, 0x40000
|
||||
; CHECK: s_add_u32 s6, s7, 0x40000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
|
||||
%a = load volatile i32, i32 addrspace(5)* %aptr
|
||||
|
||||
; Force %a to spill
|
||||
@ -88,10 +87,9 @@ entry:
|
||||
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
|
||||
|
||||
; 0x3ff00 / 64 = 4092 (for wave64)
|
||||
; CHECK: s_add_u32 s7, s7, 0x3ff00
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill
|
||||
; CHECK: s_sub_u32 s7, s7, 0x3ff00
|
||||
; CHECK: s_add_u32 s6, s7, 0x3ff00
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
|
||||
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
|
||||
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
|
||||
|
||||
@ -139,9 +137,8 @@ entry:
|
||||
|
||||
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
|
||||
; 0x40000 / 64 = 4096 (for wave64)
|
||||
; CHECK: s_add_u32 s5, s5, 0x40000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
|
||||
; CHECK: s_sub_u32 s5, s5, 0x40000
|
||||
; CHECK: s_add_u32 s6, s5, 0x40000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
|
||||
%a = load volatile i32, i32 addrspace(5)* %aptr
|
||||
|
||||
; Force %a to spill
|
||||
@ -192,10 +189,9 @@ entry:
|
||||
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
|
||||
|
||||
; 0x3ff00 / 64 = 4092 (for wave64)
|
||||
; CHECK: s_add_u32 s5, s5, 0x3ff00
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill
|
||||
; CHECK: s_sub_u32 s5, s5, 0x3ff00
|
||||
; CHECK: s_add_u32 s6, s5, 0x3ff00
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
|
||||
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
|
||||
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user