1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AMDGPU/GlobalISel: Don't use vector G_EXTRACT in arg lowering

Create a wider source vector, and unmerge with dead defs like the
legalizer. The legalization handling for G_EXTRACT is incomplete, and
it's preferrable to keep everything in 32-bit pieces.

We should probably start moving these functions into utils, since we
have a growing number of places that do almost the same thing.
This commit is contained in:
Matt Arsenault 2020-02-25 14:45:07 -05:00 committed by Matt Arsenault
parent c7017bb445
commit 1356cf472c
3 changed files with 118 additions and 37 deletions

View File

@ -480,6 +480,43 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
return true;
}
/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
static MachineInstrBuilder mergeVectorRegsToResultRegs(
MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) {
MachineRegisterInfo &MRI = *B.getMRI();
LLT LLTy = MRI.getType(DstRegs[0]);
LLT PartLLT = MRI.getType(SrcRegs[0]);
// Deal with v3s16 split into v2s16
LLT LCMTy = getLCMType(LLTy, PartLLT);
if (LCMTy == LLTy) {
// Common case where no padding is needed.
assert(DstRegs.size() == 1);
return B.buildConcatVectors(DstRegs[0], SrcRegs);
}
const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
Register Undef = B.buildUndef(PartLLT).getReg(0);
// Build vector of undefs.
SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
// Replace the first sources with the real registers.
std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
SmallVector<Register, 8> PadDstRegs(NumDst);
std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
// Create the excess dead defs for the unmerge.
for (int I = DstRegs.size(); I != NumDst; ++I)
PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
return B.buildUnmerge(PadDstRegs, Widened);
}
// TODO: Move this to generic code
static void packSplitRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> OrigRegs,
@ -492,22 +529,9 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
}
if (LLTy.isVector() && PartLLT.isVector()) {
assert(OrigRegs.size() == 1);
assert(LLTy.getElementType() == PartLLT.getElementType());
int DstElts = LLTy.getNumElements();
int PartElts = PartLLT.getNumElements();
if (DstElts % PartElts == 0)
B.buildConcatVectors(OrigRegs[0], Regs);
else {
// Deal with v3s16 split into v2s16
assert(PartElts == 2 && DstElts % 2 != 0);
int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs);
B.buildExtract(OrigRegs[0], RoundedConcat, 0);
}
mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
return;
}

View File

@ -814,10 +814,11 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY3]]
store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
@ -848,10 +849,11 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>)
; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[EXTRACT]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK: S_SETPC_B64_return [[COPY4]]
store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
@ -898,6 +900,58 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
ret void
}
; <2 x i16> pieces that start spilling to the stack.
; FIXME: load of 2 would be sufficient for last piece
define void @void_func_v65i16(<65 x i16> %arg0) #0 {
; CHECK-LABEL: name: void_func_v65i16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9
; CHECK: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10
; CHECK: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11
; CHECK: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12
; CHECK: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13
; CHECK: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14
; CHECK: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15
; CHECK: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16
; CHECK: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17
; CHECK: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18
; CHECK: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19
; CHECK: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20
; CHECK: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21
; CHECK: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22
; CHECK: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23
; CHECK: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24
; CHECK: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25
; CHECK: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26
; CHECK: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27
; CHECK: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28
; CHECK: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29
; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store 130 into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
; CHECK: S_SETPC_B64_return [[COPY33]]
store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
ret void
}
define void @void_func_v2f32(<2 x float> %arg0) #0 {
; CHECK-LABEL: name: void_func_v2f32
; CHECK: bb.1 (%ir-block.0):
@ -1191,10 +1245,11 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY3]]
store <3 x half> %arg0, <3 x half> addrspace(1)* undef

View File

@ -105,15 +105,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
; UNPACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
@ -137,9 +138,10 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[EXTRACT]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void