mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU/GlobalISel: Don't use vector G_EXTRACT in arg lowering
Create a wider source vector, and unmerge with dead defs like the legalizer. The legalization handling for G_EXTRACT is incomplete, and it's preferrable to keep everything in 32-bit pieces. We should probably start moving these functions into utils, since we have a growing number of places that do almost the same thing.
This commit is contained in:
parent
c7017bb445
commit
1356cf472c
@ -480,6 +480,43 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
|
||||
static MachineInstrBuilder mergeVectorRegsToResultRegs(
|
||||
MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) {
|
||||
MachineRegisterInfo &MRI = *B.getMRI();
|
||||
LLT LLTy = MRI.getType(DstRegs[0]);
|
||||
LLT PartLLT = MRI.getType(SrcRegs[0]);
|
||||
|
||||
// Deal with v3s16 split into v2s16
|
||||
LLT LCMTy = getLCMType(LLTy, PartLLT);
|
||||
if (LCMTy == LLTy) {
|
||||
// Common case where no padding is needed.
|
||||
assert(DstRegs.size() == 1);
|
||||
return B.buildConcatVectors(DstRegs[0], SrcRegs);
|
||||
}
|
||||
|
||||
const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
|
||||
Register Undef = B.buildUndef(PartLLT).getReg(0);
|
||||
|
||||
// Build vector of undefs.
|
||||
SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
|
||||
|
||||
// Replace the first sources with the real registers.
|
||||
std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
|
||||
|
||||
auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
|
||||
int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
|
||||
|
||||
SmallVector<Register, 8> PadDstRegs(NumDst);
|
||||
std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
|
||||
|
||||
// Create the excess dead defs for the unmerge.
|
||||
for (int I = DstRegs.size(); I != NumDst; ++I)
|
||||
PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
|
||||
|
||||
return B.buildUnmerge(PadDstRegs, Widened);
|
||||
}
|
||||
|
||||
// TODO: Move this to generic code
|
||||
static void packSplitRegsToOrigType(MachineIRBuilder &B,
|
||||
ArrayRef<Register> OrigRegs,
|
||||
@ -492,22 +529,9 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
|
||||
}
|
||||
|
||||
if (LLTy.isVector() && PartLLT.isVector()) {
|
||||
assert(OrigRegs.size() == 1);
|
||||
assert(LLTy.getElementType() == PartLLT.getElementType());
|
||||
|
||||
int DstElts = LLTy.getNumElements();
|
||||
int PartElts = PartLLT.getNumElements();
|
||||
if (DstElts % PartElts == 0)
|
||||
B.buildConcatVectors(OrigRegs[0], Regs);
|
||||
else {
|
||||
// Deal with v3s16 split into v2s16
|
||||
assert(PartElts == 2 && DstElts % 2 != 0);
|
||||
int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
|
||||
|
||||
LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
|
||||
auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs);
|
||||
B.buildExtract(OrigRegs[0], RoundedConcat, 0);
|
||||
}
|
||||
|
||||
mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -814,10 +814,11 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY3]]
|
||||
store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
|
||||
@ -848,10 +849,11 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>)
|
||||
; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[EXTRACT]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY4]]
|
||||
store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
|
||||
@ -898,6 +900,58 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; <2 x i16> pieces that start spilling to the stack.
|
||||
; FIXME: load of 2 would be sufficient for last piece
|
||||
define void @void_func_v65i16(<65 x i16> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v65i16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8
|
||||
; CHECK: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9
|
||||
; CHECK: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10
|
||||
; CHECK: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11
|
||||
; CHECK: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12
|
||||
; CHECK: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13
|
||||
; CHECK: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14
|
||||
; CHECK: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15
|
||||
; CHECK: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16
|
||||
; CHECK: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17
|
||||
; CHECK: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18
|
||||
; CHECK: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19
|
||||
; CHECK: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20
|
||||
; CHECK: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21
|
||||
; CHECK: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22
|
||||
; CHECK: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23
|
||||
; CHECK: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24
|
||||
; CHECK: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25
|
||||
; CHECK: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26
|
||||
; CHECK: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27
|
||||
; CHECK: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28
|
||||
; CHECK: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29
|
||||
; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
|
||||
; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31
|
||||
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store 130 into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
|
||||
; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY33]]
|
||||
store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v2f32(<2 x float> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v2f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
@ -1191,10 +1245,11 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY3]]
|
||||
store <3 x half> %arg0, <3 x half> addrspace(1)* undef
|
||||
|
@ -105,15 +105,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
|
||||
; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
|
||||
; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
|
||||
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
|
||||
; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
|
||||
; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; UNPACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
|
||||
; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
|
||||
; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
|
||||
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
|
||||
; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
|
||||
; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
@ -137,9 +138,10 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
|
||||
; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
|
||||
; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
|
||||
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
|
||||
; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
|
||||
; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[EXTRACT]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
|
||||
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
|
||||
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
|
||||
; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
|
Loading…
Reference in New Issue
Block a user