From 348c15fbb0fd600669d286d3c26bd5317541f726 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 27 Jan 2021 13:02:43 -0800 Subject: [PATCH] [AMDGPU] Do not reassign spilled registers We cannot call LRM::unassign() if LRM::assign() was never called before, these are symmetrical calls. There are two ways of assigning a physical register to virtual, via LRM::assign() and via VRM::assignVirt2Phys(). LRM::assign() will call the VRM to assign the register and then update LiveIntervalUnion. Inline spiller calls VRM directly and thus LiveIntervalUnion never gets updated. A call to LRM::unassign() then asserts about inconsistent liveness. We have to note that not all callers of the InlineSpiller even have LRM to pass, RegAllocPBQP does not have it, so we cannot always pass LRM into the spiller. The only way to get into that spiller LRE_DidCloneVirtReg() call is from LiveRangeEdit::eliminateDeadDefs if we split an LI. This patch refuses to reassign a LiveInterval created by a split to workaround the problem. In fact we cannot reassign a spill anyway as all registers of the needed class are occupied and we are spilling. Fixes: SWDEV-267996 Differential Revision: https://reviews.llvm.org/D95489 --- lib/Target/AMDGPU/GCNNSAReassign.cpp | 8 +++ lib/Target/AMDGPU/GCNRegBankReassign.cpp | 8 +++ test/CodeGen/AMDGPU/nsa-reassign.mir | 72 +++++++++++++++++++ .../CodeGen/AMDGPU/regbank-reassign-split.mir | 38 ++++++++++ 4 files changed, 126 insertions(+) create mode 100644 test/CodeGen/AMDGPU/nsa-reassign.mir create mode 100644 test/CodeGen/AMDGPU/regbank-reassign-split.mir diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp index fc7105bc15a..9f98f9ada80 100644 --- a/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -190,6 +190,14 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) return NSA_Status::FIXED; + // InlineSpiller does not call LRM::assign() after an LI split leaving + // it in an inconsistent state, so we cannot call LRM::unassign(). + // See llvm bug #48911. + // Skip reassign if a register has originated from such split. + // FIXME: Remove the workaround when bug #48911 is fixed. + if (VRM->getPreSplitReg(Reg)) + return NSA_Status::FIXED; + const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp index a12e9ab03e1..4fd76ba57ad 100644 --- a/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -471,6 +471,14 @@ bool GCNRegBankReassign::isReassignable(Register Reg) const { if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) return false; + // InlineSpiller does not call LRM::assign() after an LI split leaving it + // in an inconsistent state, so we cannot call LRM::unassign(). + // See llvm bug #48911. + // Skip reassign if a register has originated from such split. + // FIXME: Remove the workaround when bug #48911 is fixed. + if (VRM->getPreSplitReg(Reg)) + return false; + const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); Register PhysReg = VRM->getPhys(Reg); diff --git a/test/CodeGen/AMDGPU/nsa-reassign.mir b/test/CodeGen/AMDGPU/nsa-reassign.mir new file mode 100644 index 00000000000..1d1815ac4ec --- /dev/null +++ b/test/CodeGen/AMDGPU/nsa-reassign.mir @@ -0,0 +1,72 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-nsa-reassign,virtregrewriter,si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @nsa_reassign() #0 { ret void } + define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void } + + attributes #0 = { "amdgpu-num-vgpr"="8" } +... + +# GCN-LABEL: name: nsa_reassign +# GCN: IMAGE_SAMPLE_C_L_V1_V8_gfx10 +--- +name: nsa_reassign +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32, preferred-register: '$vgpr6' } + - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + S_ENDPGM 0, implicit %7 +... + +# GCN-LABEL: do_not_reassign_spill +# GCN: IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 +--- +name: do_not_reassign_spill +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32, preferred-register: '$vgpr6' } + - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6 + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + S_ENDPGM 0, implicit %7 +... diff --git a/test/CodeGen/AMDGPU/regbank-reassign-split.mir b/test/CodeGen/AMDGPU/regbank-reassign-split.mir new file mode 100644 index 00000000000..8862644d226 --- /dev/null +++ b/test/CodeGen/AMDGPU/regbank-reassign-split.mir @@ -0,0 +1,38 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void } + + attributes #0 = { "amdgpu-num-vgpr"="8" } +... + +# GCN-LABEL: do_not_reassign_spill{{$}} +# GCN: V_AND_B32_e32 killed $vgpr1, killed $vgpr5, +--- +name: do_not_reassign_spill +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr0' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 2, class: vgpr_32, preferred-register: '$vgpr2' } + - { id: 3, class: vgpr_32, preferred-register: '$vgpr3' } + - { id: 4, class: vgpr_32, preferred-register: '$vgpr4' } + - { id: 5, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 6, class: vgpr_32 } +body: | + bb.0: + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 + %6 = V_AND_B32_e32 %1, %5, implicit $exec + S_ENDPGM 0, implicit %6 +...