1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AMDGPU] Do not reassign spilled registers

We cannot call LRM::unassign() if LRM::assign() was never called
before, these are symmetrical calls. There are two ways of
assigning a physical register to virtual, via LRM::assign() and
via VRM::assignVirt2Phys(). LRM::assign() will call the VRM to
assign the register and then update LiveIntervalUnion. Inline
spiller calls VRM directly and thus LiveIntervalUnion never gets
updated. A call to LRM::unassign() then asserts about inconsistent
liveness.

We have to note that not all callers of the InlineSpiller even
have LRM to pass, RegAllocPBQP does not have it, so we cannot
always pass LRM into the spiller.

The only way to get into that spiller LRE_DidCloneVirtReg() call
is from LiveRangeEdit::eliminateDeadDefs if we split an LI.

This patch refuses to reassign a LiveInterval created by a split
to workaround the problem. In fact we cannot reassign a spill
anyway as all registers of the needed class are occupied and we
are spilling.

Fixes: SWDEV-267996

Differential Revision: https://reviews.llvm.org/D95489
This commit is contained in:
Stanislav Mekhanoshin 2021-01-27 13:02:43 -08:00
parent 612a586751
commit 348c15fbb0
4 changed files with 126 additions and 0 deletions

View File

@ -190,6 +190,14 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
return NSA_Status::FIXED;
// InlineSpiller does not call LRM::assign() after an LI split leaving
// it in an inconsistent state, so we cannot call LRM::unassign().
// See llvm bug #48911.
// Skip reassign if a register has originated from such split.
// FIXME: Remove the workaround when bug #48911 is fixed.
if (VRM->getPreSplitReg(Reg))
return NSA_Status::FIXED;
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)

View File

@ -471,6 +471,14 @@ bool GCNRegBankReassign::isReassignable(Register Reg) const {
if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
return false;
// InlineSpiller does not call LRM::assign() after an LI split leaving it
// in an inconsistent state, so we cannot call LRM::unassign().
// See llvm bug #48911.
// Skip reassign if a register has originated from such split.
// FIXME: Remove the workaround when bug #48911 is fixed.
if (VRM->getPreSplitReg(Reg))
return false;
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
Register PhysReg = VRM->getPhys(Reg);

View File

@ -0,0 +1,72 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-nsa-reassign,virtregrewriter,si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_kernel void @nsa_reassign() #0 { ret void }
define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void }
attributes #0 = { "amdgpu-num-vgpr"="8" }
...
# GCN-LABEL: name: nsa_reassign
# GCN: IMAGE_SAMPLE_C_L_V1_V8_gfx10
---
name: nsa_reassign
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 6, class: vgpr_32, preferred-register: '$vgpr6' }
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
body: |
bb.0:
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
S_ENDPGM 0, implicit %7
...
# GCN-LABEL: do_not_reassign_spill
# GCN: IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10
---
name: do_not_reassign_spill
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 6, class: vgpr_32, preferred-register: '$vgpr6' }
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
body: |
bb.0:
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
S_ENDPGM 0, implicit %7
...

View File

@ -0,0 +1,38 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void }
attributes #0 = { "amdgpu-num-vgpr"="8" }
...
# GCN-LABEL: do_not_reassign_spill{{$}}
# GCN: V_AND_B32_e32 killed $vgpr1, killed $vgpr5,
---
name: do_not_reassign_spill
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 6, class: vgpr_32 }
body: |
bb.0:
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
%6 = V_AND_B32_e32 %1, %5, implicit $exec
S_ENDPGM 0, implicit %6
...