mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
GlobalISel: Implement fewerElementsVector for G_INSERT_VECTOR_ELT
Add unit tests since AMDGPU will only trigger this for gigantic vectors, and won't use the annoying odd sized breakdown case.
This commit is contained in:
parent
01ab206194
commit
418515b7d0
@ -279,9 +279,9 @@ public:
|
||||
LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI,
|
||||
unsigned TypeIdx,
|
||||
LLT NarrowTy);
|
||||
LegalizeResult fewerElementsVectorExtractVectorElt(MachineInstr &MI,
|
||||
unsigned TypeIdx,
|
||||
LLT NarrowTy);
|
||||
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
|
||||
unsigned TypeIdx,
|
||||
LLT NarrowTy);
|
||||
|
||||
LegalizeResult
|
||||
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
|
||||
|
@ -3608,18 +3608,24 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI,
|
||||
unsigned TypeIdx,
|
||||
LLT NarrowVecTy) {
|
||||
assert(TypeIdx == 1 && "not a vector type index");
|
||||
LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
|
||||
unsigned TypeIdx,
|
||||
LLT NarrowVecTy) {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcVec = MI.getOperand(1).getReg();
|
||||
Register InsertVal;
|
||||
bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
|
||||
|
||||
assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
|
||||
if (IsInsert)
|
||||
InsertVal = MI.getOperand(2).getReg();
|
||||
|
||||
Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
|
||||
|
||||
// TODO: Handle total scalarization case.
|
||||
if (!NarrowVecTy.isVector())
|
||||
return UnableToLegalize;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcVec = MI.getOperand(1).getReg();
|
||||
Register Idx = MI.getOperand(2).getReg();
|
||||
LLT VecTy = MRI.getType(SrcVec);
|
||||
|
||||
// If the index is a constant, we can really break this down as you would
|
||||
@ -3637,8 +3643,8 @@ LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI,
|
||||
LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
|
||||
|
||||
// Build a sequence of NarrowTy pieces in VecParts for this operand.
|
||||
buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
|
||||
TargetOpcode::G_ANYEXT);
|
||||
LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
|
||||
TargetOpcode::G_ANYEXT);
|
||||
|
||||
unsigned NewNumElts = NarrowVecTy.getNumElements();
|
||||
|
||||
@ -3647,12 +3653,26 @@ LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI,
|
||||
auto NewIdx =
|
||||
MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
|
||||
|
||||
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
|
||||
if (IsInsert) {
|
||||
LLT PartTy = MRI.getType(VecParts[PartIdx]);
|
||||
|
||||
// Use the adjusted index to insert into one of the subvectors.
|
||||
auto InsertPart = MIRBuilder.buildInsertVectorElement(
|
||||
PartTy, VecParts[PartIdx], InsertVal, NewIdx);
|
||||
VecParts[PartIdx] = InsertPart.getReg(0);
|
||||
|
||||
// Recombine the inserted subvector with the others to reform the result
|
||||
// vector.
|
||||
buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
|
||||
} else {
|
||||
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
// With a variable index, we can't perform the extract in a smaller type, so
|
||||
// With a variable index, we can't perform the operation in a smaller type, so
|
||||
// we're forced to expand this.
|
||||
//
|
||||
// TODO: We could emit a chain of compare/select to figure out which piece to
|
||||
@ -3992,7 +4012,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
|
||||
case G_BUILD_VECTOR:
|
||||
return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
|
||||
case G_EXTRACT_VECTOR_ELT:
|
||||
return fewerElementsVectorExtractVectorElt(MI, TypeIdx, NarrowTy);
|
||||
case G_INSERT_VECTOR_ELT:
|
||||
return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
|
||||
case G_LOAD:
|
||||
case G_STORE:
|
||||
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
|
||||
|
@ -1359,7 +1359,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
.clampScalar(EltTypeIdx, S32, S64)
|
||||
.clampScalar(VecTypeIdx, S32, S64)
|
||||
.clampScalar(IdxTypeIdx, S32, S32)
|
||||
.clampMaxNumElements(1, S32, 32)
|
||||
.clampMaxNumElements(VecTypeIdx, S32, 32)
|
||||
// TODO: Clamp elements for 64-bit vectors?
|
||||
// It should only be necessary with variable indexes.
|
||||
// As a last resort, lower to the stack
|
||||
|
137
test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
Normal file
137
test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
Normal file
@ -0,0 +1,137 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, <64 x i32> addrspace(1)* %ptr.out) #0 {
|
||||
; GCN-LABEL: v_insert_v64i32_37:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GCN-NEXT: v_lshlrev_b64 v[0:1], 8, v[0:1]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_add_co_u32_e32 v8, vcc, v2, v0
|
||||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v1, vcc
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x80
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_add_co_u32_e32 v12, vcc, v8, v2
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xc0
|
||||
; GCN-NEXT: v_mov_b32_e32 v65, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, s3
|
||||
; GCN-NEXT: v_mov_b32_e32 v64, s0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x50
|
||||
; GCN-NEXT: v_mov_b32_e32 v69, s1
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, v9, v3, vcc
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_add_co_u32_e32 v66, vcc, v4, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v68, s0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x60
|
||||
; GCN-NEXT: v_mov_b32_e32 v71, s1
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v67, vcc, v5, v1, vcc
|
||||
; GCN-NEXT: v_mov_b32_e32 v70, s0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x70
|
||||
; GCN-NEXT: v_mov_b32_e32 v73, s1
|
||||
; GCN-NEXT: v_add_co_u32_e32 v74, vcc, v66, v2
|
||||
; GCN-NEXT: v_mov_b32_e32 v72, s0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x90
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v75, vcc, v67, v3, vcc
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: v_add_co_u32_e32 v76, vcc, v66, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v77, vcc, v67, v1, vcc
|
||||
; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[0:3], v[12:13], off
|
||||
; GCN-NEXT: v_add_co_u32_e32 v10, vcc, 64, v8
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v9, vcc
|
||||
; GCN-NEXT: v_add_co_u32_e32 v28, vcc, v8, v64
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v29, vcc, v9, v65, vcc
|
||||
; GCN-NEXT: global_load_dwordx4 v[32:35], v[8:9], off
|
||||
; GCN-NEXT: global_load_dwordx4 v[36:39], v[8:9], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[40:43], v[8:9], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[44:47], v[8:9], off offset:48
|
||||
; GCN-NEXT: global_load_dwordx4 v[48:51], v[10:11], off
|
||||
; GCN-NEXT: global_load_dwordx4 v[52:55], v[10:11], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[56:59], v[10:11], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[60:63], v[10:11], off offset:48
|
||||
; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48
|
||||
; GCN-NEXT: global_load_dwordx4 v[16:19], v[28:29], off
|
||||
; GCN-NEXT: global_load_dwordx4 v[20:23], v[28:29], off offset:16
|
||||
; GCN-NEXT: global_load_dwordx4 v[24:27], v[28:29], off offset:32
|
||||
; GCN-NEXT: global_load_dwordx4 v[28:31], v[28:29], off offset:48
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xa0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(15)
|
||||
; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7
|
||||
; GCN-NEXT: s_waitcnt vmcnt(14)
|
||||
; GCN-NEXT: global_store_dwordx4 v[74:75], v[0:3], off
|
||||
; GCN-NEXT: global_store_dwordx4 v[76:77], v[4:7], off
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v64
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xd0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v65, vcc
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xe0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[2:3], v[20:23], off
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0
|
||||
; GCN-NEXT: s_movk_i32 s0, 0xf0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[24:27], off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(7)
|
||||
; GCN-NEXT: global_store_dwordx4 v[2:3], v[28:31], off
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 64, v66
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v67, vcc
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[36:39], off offset:-48
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[40:43], off offset:-32
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[44:47], off offset:-16
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[48:51], off
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v68
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v69, vcc
|
||||
; GCN-NEXT: global_store_dwordx4 v[66:67], v[32:35], off
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[52:55], off
|
||||
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v70
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v71, vcc
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v72
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v73, vcc
|
||||
; GCN-NEXT: global_store_dwordx4 v[0:1], v[56:59], off
|
||||
; GCN-NEXT: global_store_dwordx4 v[2:3], v[60:63], off
|
||||
; GCN-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.in, i32 %id
|
||||
%vec = load <64 x i32>, <64 x i32> addrspace(1)* %gep.in
|
||||
%insert = insertelement <64 x i32> %vec, i32 999, i32 37
|
||||
%gep.out = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.out, i32 %id
|
||||
store <64 x i32> %insert, <64 x i32> addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { "amdgpu-waves-per-eu"="1,10" }
|
||||
attributes #1 = { nounwind readnone speculatable willreturn }
|
File diff suppressed because it is too large
Load Diff
@ -3051,4 +3051,85 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) {
|
||||
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
|
||||
}
|
||||
|
||||
// Test fewer elements of G_INSERT_VECTOR_ELEMENT
|
||||
TEST_F(AArch64GISelMITest, FewerElementsInsertVectorElt) {
|
||||
setUp();
|
||||
if (!TM)
|
||||
return;
|
||||
|
||||
DefineLegalizerInfo(A, {});
|
||||
|
||||
LLT P0{LLT::pointer(0, 64)};
|
||||
LLT S64{LLT::scalar(64)};
|
||||
LLT S16{LLT::scalar(16)};
|
||||
LLT V2S16{LLT::vector(2, 16)};
|
||||
LLT V3S16{LLT::vector(3, 16)};
|
||||
LLT V8S16{LLT::vector(8, 16)};
|
||||
|
||||
auto Ptr0 = B.buildIntToPtr(P0, Copies[0]);
|
||||
auto VectorV8 = B.buildLoad(V8S16, Ptr0, MachinePointerInfo(), Align(8));
|
||||
auto Value = B.buildTrunc(S16, Copies[1]);
|
||||
|
||||
auto Seven = B.buildConstant(S64, 7);
|
||||
auto InsertV8Constant7_0 =
|
||||
B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven);
|
||||
auto InsertV8Constant7_1 =
|
||||
B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven);
|
||||
|
||||
B.buildStore(InsertV8Constant7_0, Ptr0, MachinePointerInfo(), Align(8),
|
||||
MachineMemOperand::MOVolatile);
|
||||
B.buildStore(InsertV8Constant7_1, Ptr0, MachinePointerInfo(), Align(8),
|
||||
MachineMemOperand::MOVolatile);
|
||||
|
||||
AInfo Info(MF->getSubtarget());
|
||||
DummyGISelObserver Observer;
|
||||
LegalizerHelper Helper(*MF, Info, Observer, B);
|
||||
|
||||
// Perform Legalization
|
||||
B.setInsertPt(*EntryMBB, InsertV8Constant7_0->getIterator());
|
||||
|
||||
// This should index the high element of the 4th piece of an unmerge.
|
||||
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
|
||||
Helper.fewerElementsVector(*InsertV8Constant7_0, 0, V2S16));
|
||||
|
||||
// This case requires extracting an intermediate vector type into the target
|
||||
// v4s16.
|
||||
B.setInsertPt(*EntryMBB, InsertV8Constant7_1->getIterator());
|
||||
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
|
||||
Helper.fewerElementsVector(*InsertV8Constant7_1, 0, V3S16));
|
||||
|
||||
const auto *CheckStr = R"(
|
||||
CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY
|
||||
CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY
|
||||
CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY
|
||||
CHECK: [[PTR0:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY0]]
|
||||
CHECK: [[VEC8:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR0]]:_(p0) :: (load 16, align 8)
|
||||
CHECK: [[INSERT_VAL:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]]
|
||||
|
||||
|
||||
CHECK: [[UNMERGE0:%[0-9]+]]:_(<2 x s16>), [[UNMERGE1:%[0-9]+]]:_(<2 x s16>), [[UNMERGE2:%[0-9]+]]:_(<2 x s16>), [[UNMERGE3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[VEC8]]
|
||||
CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
CHECK: [[SUB_INSERT_7:%[0-9]+]]:_(<2 x s16>) = G_INSERT_VECTOR_ELT [[UNMERGE3]]:_, [[INSERT_VAL]]:_(s16), [[ONE]]
|
||||
CHECK: [[INSERT_V8_7_0:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UNMERGE0]]:_(<2 x s16>), [[UNMERGE1]]:_(<2 x s16>), [[UNMERGE2]]:_(<2 x s16>), [[SUB_INSERT_7]]:_(<2 x s16>)
|
||||
|
||||
|
||||
CHECK: [[UNMERGE1_0:%[0-9]+]]:_(s16), [[UNMERGE1_1:%[0-9]+]]:_(s16), [[UNMERGE1_2:%[0-9]+]]:_(s16), [[UNMERGE1_3:%[0-9]+]]:_(s16), [[UNMERGE1_4:%[0-9]+]]:_(s16), [[UNMERGE1_5:%[0-9]+]]:_(s16), [[UNMERGE1_6:%[0-9]+]]:_(s16), [[UNMERGE1_7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[VEC8]]:_(<8 x s16>)
|
||||
CHECK: [[IMPDEF_S16:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
|
||||
CHECK: [[BUILD0:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_0]]:_(s16), [[UNMERGE1_1]]:_(s16), [[UNMERGE1_2]]:_(s16)
|
||||
CHECK: [[BUILD1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_3]]:_(s16), [[UNMERGE1_4]]:_(s16), [[UNMERGE1_5]]:_(s16)
|
||||
CHECK: [[BUILD2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_6]]:_(s16), [[UNMERGE1_7]]:_(s16), [[IMPDEF_S16]]:_(s16)
|
||||
CHECK: [[IMPDEF_V3S16:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
|
||||
CHECK: [[ONE_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
CHECK: [[SUB_INSERT_7_V3S16:%[0-9]+]]:_(<3 x s16>) = G_INSERT_VECTOR_ELT [[BUILD2]]:_, [[INSERT_VAL]]:_(s16), [[ONE_1]]
|
||||
CHECK: [[WIDE_CONCAT:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BUILD0]]:_(<3 x s16>), [[BUILD1]]:_(<3 x s16>), [[SUB_INSERT_7_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>)
|
||||
CHECK: [[INSERT_V8_7_1:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[WIDE_CONCAT]]:_(<24 x s16>), 0
|
||||
|
||||
CHECK: G_STORE [[INSERT_V8_7_0]]
|
||||
CHECK: G_STORE [[INSERT_V8_7_1]]
|
||||
)";
|
||||
|
||||
// Check
|
||||
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user