1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

AMDGPU/GlobalISel: Ensure subreg is valid when selecting G_UNMERGE_VALUES

Fixes verifier error with SGPR unmerges with 96-bit result types.
This commit is contained in:
Matt Arsenault 2020-08-04 09:19:43 -04:00
parent 0f45fb4bbc
commit 1a2c4f8162
4 changed files with 108 additions and 6 deletions

View File

@ -567,6 +567,11 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
.addReg(SrcReg, SrcFlags, SubRegs[I]);
// Make sure the subregister index is valid for the source register.
SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);
if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;
const TargetRegisterClass *DstRC =
TRI.getConstrainedRegClassForOperand(Dst, *MRI);
if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))

View File

@ -266,3 +266,74 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
$sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2
...
---
name: test_unmerge_s_v3s32_s_v12s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32
; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
; GCN: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; GCN: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5
; GCN: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
; GCN: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
; GCN: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
; GCN: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
; GCN: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
; GCN: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
; GCN: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]]
; GCN: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]]
; GCN: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]]
; GCN: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]]
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
%1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5
%2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8
%3:sgpr(<3 x s32>) = COPY $sgpr9_sgpr10_sgpr11
%4:sgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3
%5:sgpr(<3 x s32>), %6:sgpr(<3 x s32>), %7:sgpr(<3 x s32>), %8:sgpr(<3 x s32>) = G_UNMERGE_VALUES %4
$sgpr0_sgpr1_sgpr2 = COPY %5
$sgpr3_sgpr4_sgpr5 = COPY %6
$sgpr6_sgpr7_sgpr8 = COPY %7
$sgpr9_sgpr10_sgpr11 = COPY %8
...
---
name: test_unmerge_v_v3s32_v_v12s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GCN: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; GCN: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11
; GCN: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
; GCN: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
; GCN: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
; GCN: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
; GCN: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]]
; GCN: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]]
; GCN: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]]
; GCN: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]]
%0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
%1:vgpr(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
%2:vgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1
%3:vgpr(<3 x s32>), %4:vgpr(<3 x s32>), %5:vgpr(<3 x s32>), %6:vgpr(<3 x s32>) = G_UNMERGE_VALUES %2
$vgpr0_vgpr1_vgpr2 = COPY %3
$vgpr3_vgpr4_vgpr5 = COPY %4
$vgpr6_vgpr7_vgpr8 = COPY %5
$vgpr9_vgpr10_vgpr11 = COPY %6
...

View File

@ -1125,3 +1125,29 @@ body: |
$vgpr1 = COPY %6
...
---
name: test_unmerge_v3s32_v12s32
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; CHECK-LABEL: name: test_unmerge_v3s32_v12s32
; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s32>)
; CHECK: [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY1]](<6 x s32>)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
; CHECK: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
; CHECK: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x s32>)
; CHECK: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x s32>)
%0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
%1:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
%2:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1
%3:_(<3 x s32>), %4:_(<3 x s32>), %5:_(<3 x s32>), %6:_(<3 x s32>) = G_UNMERGE_VALUES %2
$vgpr0_vgpr1_vgpr2 = COPY %3
$vgpr3_vgpr4_vgpr5 = COPY %4
$vgpr6_vgpr7_vgpr8 = COPY %5
$vgpr9_vgpr10_vgpr11 = COPY %6
...

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
@ -174,7 +174,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX6: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
@ -203,7 +203,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX7: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
@ -232,7 +232,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX8: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8