mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[NEON] Support VST1xN intrinsics in AArch32 mode (LLVM part)
We currently support them only in AArch64. The NEON Reference, however, says they are 'ARMv7, ARMv8' intrinsics. Differential Revision: https://reviews.llvm.org/D47447 llvm-svn: 334361
This commit is contained in:
parent
62affa8ef9
commit
871056ae45
@ -671,6 +671,20 @@ def int_arm_neon_vst4 : Intrinsic<[],
|
||||
LLVMMatchType<1>, llvm_i32_ty],
|
||||
[IntrArgMemOnly]>;
|
||||
|
||||
def int_arm_neon_vst1x2 : Intrinsic<[],
|
||||
[llvm_anyptr_ty, llvm_anyvector_ty,
|
||||
LLVMMatchType<1>],
|
||||
[IntrArgMemOnly, NoCapture<0>]>;
|
||||
def int_arm_neon_vst1x3 : Intrinsic<[],
|
||||
[llvm_anyptr_ty, llvm_anyvector_ty,
|
||||
LLVMMatchType<1>, LLVMMatchType<1>],
|
||||
[IntrArgMemOnly, NoCapture<0>]>;
|
||||
def int_arm_neon_vst1x4 : Intrinsic<[],
|
||||
[llvm_anyptr_ty, llvm_anyvector_ty,
|
||||
LLVMMatchType<1>, LLVMMatchType<1>,
|
||||
LLVMMatchType<1>],
|
||||
[IntrArgMemOnly, NoCapture<0>]>;
|
||||
|
||||
// Vector store N-element structure from one lane.
|
||||
// Source operands are: the address, the N vectors, the lane number, and
|
||||
// the alignment.
|
||||
|
@ -284,12 +284,34 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
|
||||
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
|
||||
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
|
||||
|
||||
{ ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false},
|
||||
{ ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false},
|
||||
{ ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false},
|
||||
{ ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false},
|
||||
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
|
||||
{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
|
||||
{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
|
||||
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
|
||||
{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
|
||||
{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
|
||||
{ ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false},
|
||||
{ ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false},
|
||||
{ ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false},
|
||||
{ ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false},
|
||||
{ ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false},
|
||||
{ ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false},
|
||||
{ ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false},
|
||||
{ ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false},
|
||||
{ ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false},
|
||||
{ ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false},
|
||||
{ ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false},
|
||||
{ ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false},
|
||||
{ ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false},
|
||||
{ ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false},
|
||||
{ ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false},
|
||||
{ ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false},
|
||||
{ ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false},
|
||||
{ ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false},
|
||||
|
||||
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
|
||||
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
|
||||
@ -465,7 +487,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
|
||||
// and register forms. Some real instructions, however, do not rely on
|
||||
// am6offset and have separate definitions for such forms. When this is the
|
||||
// case, fixed forms do not take any offset nodes, so here we skip them for
|
||||
// such intructions. Once all real and pseudo writing-back instructions are
|
||||
// such instructions. Once all real and pseudo writing-back instructions are
|
||||
// rewritten without use of am6offset nodes, this code will go away.
|
||||
const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
|
||||
if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
|
||||
@ -477,7 +499,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
|
||||
TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) {
|
||||
assert(AM6Offset.getReg() == 0 &&
|
||||
"A fixed writing-back pseudo intruction provides an offset "
|
||||
"A fixed writing-back pseudo instruction provides an offset "
|
||||
"register!");
|
||||
} else {
|
||||
MIB.add(AM6Offset);
|
||||
@ -534,9 +556,31 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
|
||||
// Copy the addrmode6 operands.
|
||||
MIB.add(MI.getOperand(OpIdx++));
|
||||
MIB.add(MI.getOperand(OpIdx++));
|
||||
// Copy the am6offset operand.
|
||||
if (TableEntry->hasWritebackOperand)
|
||||
MIB.add(MI.getOperand(OpIdx++));
|
||||
|
||||
if (TableEntry->hasWritebackOperand) {
|
||||
// TODO: The writing-back pseudo instructions we translate here are all
|
||||
// defined to take am6offset nodes that are capable to represent both fixed
|
||||
// and register forms. Some real instructions, however, do not rely on
|
||||
// am6offset and have separate definitions for such forms. When this is the
|
||||
// case, fixed forms do not take any offset nodes, so here we skip them for
|
||||
// such instructions. Once all real and pseudo writing-back instructions are
|
||||
// rewritten without use of am6offset nodes, this code will go away.
|
||||
const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
|
||||
if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
|
||||
TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
|
||||
assert(AM6Offset.getReg() == 0 &&
|
||||
"A fixed writing-back pseudo instruction provides an offset "
|
||||
"register!");
|
||||
} else {
|
||||
MIB.add(AM6Offset);
|
||||
}
|
||||
}
|
||||
|
||||
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
||||
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
|
||||
@ -1645,6 +1689,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
case ARM::VST3d8Pseudo:
|
||||
case ARM::VST3d16Pseudo:
|
||||
case ARM::VST3d32Pseudo:
|
||||
case ARM::VST1d8TPseudo:
|
||||
case ARM::VST1d16TPseudo:
|
||||
case ARM::VST1d32TPseudo:
|
||||
case ARM::VST1d64TPseudo:
|
||||
case ARM::VST3d8Pseudo_UPD:
|
||||
case ARM::VST3d16Pseudo_UPD:
|
||||
@ -1663,12 +1710,31 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
case ARM::VST4d8Pseudo:
|
||||
case ARM::VST4d16Pseudo:
|
||||
case ARM::VST4d32Pseudo:
|
||||
case ARM::VST1d8QPseudo:
|
||||
case ARM::VST1d16QPseudo:
|
||||
case ARM::VST1d32QPseudo:
|
||||
case ARM::VST1d64QPseudo:
|
||||
case ARM::VST4d8Pseudo_UPD:
|
||||
case ARM::VST4d16Pseudo_UPD:
|
||||
case ARM::VST4d32Pseudo_UPD:
|
||||
case ARM::VST1d64QPseudoWB_fixed:
|
||||
case ARM::VST1d64QPseudoWB_register:
|
||||
case ARM::VST1q8HighQPseudo:
|
||||
case ARM::VST1q8LowQPseudo_UPD:
|
||||
case ARM::VST1q8HighTPseudo:
|
||||
case ARM::VST1q8LowTPseudo_UPD:
|
||||
case ARM::VST1q16HighQPseudo:
|
||||
case ARM::VST1q16LowQPseudo_UPD:
|
||||
case ARM::VST1q16HighTPseudo:
|
||||
case ARM::VST1q16LowTPseudo_UPD:
|
||||
case ARM::VST1q32HighQPseudo:
|
||||
case ARM::VST1q32LowQPseudo_UPD:
|
||||
case ARM::VST1q32HighTPseudo:
|
||||
case ARM::VST1q32LowTPseudo_UPD:
|
||||
case ARM::VST1q64HighQPseudo:
|
||||
case ARM::VST1q64LowQPseudo_UPD:
|
||||
case ARM::VST1q64HighTPseudo:
|
||||
case ARM::VST1q64LowTPseudo_UPD:
|
||||
case ARM::VST4q8Pseudo_UPD:
|
||||
case ARM::VST4q16Pseudo_UPD:
|
||||
case ARM::VST4q32Pseudo_UPD:
|
||||
|
@ -1903,9 +1903,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32: OpcodeIndex = 2; break;
|
||||
case MVT::v2f64:
|
||||
case MVT::v2i64: OpcodeIndex = 3;
|
||||
assert(NumVecs == 1 && "v2i64 type only supported for VST1");
|
||||
break;
|
||||
case MVT::v2i64: OpcodeIndex = 3; break;
|
||||
}
|
||||
|
||||
std::vector<EVT> ResTys;
|
||||
@ -3562,6 +3560,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1x2: {
|
||||
static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
|
||||
ARM::VST1q32, ARM::VST1q64 };
|
||||
static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
|
||||
ARM::VST1d16QPseudo,
|
||||
ARM::VST1d32QPseudo,
|
||||
ARM::VST1d64QPseudo };
|
||||
SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1x3: {
|
||||
static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
|
||||
ARM::VST1d16TPseudo,
|
||||
ARM::VST1d32TPseudo,
|
||||
ARM::VST1d64TPseudo };
|
||||
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
|
||||
ARM::VST1q16LowTPseudo_UPD,
|
||||
ARM::VST1q32LowTPseudo_UPD,
|
||||
ARM::VST1q64LowTPseudo_UPD };
|
||||
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
|
||||
ARM::VST1q16HighTPseudo,
|
||||
ARM::VST1q32HighTPseudo,
|
||||
ARM::VST1q64HighTPseudo };
|
||||
SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1x4: {
|
||||
static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
|
||||
ARM::VST1d16QPseudo,
|
||||
ARM::VST1d32QPseudo,
|
||||
ARM::VST1d64QPseudo };
|
||||
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
|
||||
ARM::VST1q16LowQPseudo_UPD,
|
||||
ARM::VST1q32LowQPseudo_UPD,
|
||||
ARM::VST1q64LowQPseudo_UPD };
|
||||
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
|
||||
ARM::VST1q16HighQPseudo,
|
||||
ARM::VST1q32HighQPseudo,
|
||||
ARM::VST1q64HighQPseudo };
|
||||
SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2: {
|
||||
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
|
||||
ARM::VST2d32, ARM::VST1q64 };
|
||||
|
@ -12773,6 +12773,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case Intrinsic::arm_neon_vld3lane:
|
||||
case Intrinsic::arm_neon_vld4lane:
|
||||
case Intrinsic::arm_neon_vst1:
|
||||
case Intrinsic::arm_neon_vst1x2:
|
||||
case Intrinsic::arm_neon_vst1x3:
|
||||
case Intrinsic::arm_neon_vst1x4:
|
||||
case Intrinsic::arm_neon_vst2:
|
||||
case Intrinsic::arm_neon_vst3:
|
||||
case Intrinsic::arm_neon_vst4:
|
||||
@ -14118,6 +14121,27 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
Info.flags = MachineMemOperand::MOStore;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::arm_neon_vst1x2:
|
||||
case Intrinsic::arm_neon_vst1x3:
|
||||
case Intrinsic::arm_neon_vst1x4: {
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
// Conservatively set memVT to the entire set of vectors stored.
|
||||
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
|
||||
unsigned NumElts = 0;
|
||||
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
|
||||
Type *ArgTy = I.getArgOperand(ArgI)->getType();
|
||||
if (!ArgTy->isVectorTy())
|
||||
break;
|
||||
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
|
||||
}
|
||||
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.align = 0;
|
||||
// volatile stores with NEON intrinsics not supported
|
||||
Info.flags = MachineMemOperand::MOStore;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::arm_ldaex:
|
||||
case Intrinsic::arm_ldrex: {
|
||||
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
|
||||
|
@ -1801,10 +1801,22 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
|
||||
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
|
||||
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
|
||||
|
||||
def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
|
||||
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
|
||||
|
||||
def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
|
||||
|
||||
// ...with 4 registers
|
||||
class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
|
||||
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
|
||||
@ -1844,10 +1856,22 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
|
||||
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
|
||||
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
|
||||
|
||||
def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
|
||||
def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
|
||||
|
||||
def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
|
||||
|
||||
// VST2 : Vector Store (multiple 2-element structures)
|
||||
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
|
||||
InstrItinClass itin, Operand AddrMode>
|
||||
|
363
test/CodeGen/ARM/arm-vst1.ll
Normal file
363
test/CodeGen/ARM/arm-vst1.ll
Normal file
@ -0,0 +1,363 @@
|
||||
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
|
||||
; RUN: -asm-verbose=false | FileCheck %s
|
||||
|
||||
; %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
|
||||
; %struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
|
||||
; %struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
|
||||
|
||||
; %struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
|
||||
; %struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
|
||||
; %struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
|
||||
|
||||
; %struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
|
||||
; %struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
|
||||
; %struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
|
||||
|
||||
; %struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
|
||||
; %struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
|
||||
; %struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
|
||||
|
||||
; %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
|
||||
; %struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
|
||||
; %struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
|
||||
|
||||
; %struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
|
||||
; %struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
|
||||
; %struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
|
||||
|
||||
; %struct.uint64x2x2_t = type { <2 x i64>, <2 x i64> }
|
||||
; %struct.uint64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> }
|
||||
; %struct.uint64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
|
||||
|
||||
; %struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
|
||||
; %struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
|
||||
; %struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
|
||||
|
||||
%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
|
||||
%struct.uint16x4x3_t = type { [3 x <4 x i16>] }
|
||||
%struct.uint16x4x4_t = type { [4 x <4 x i16>] }
|
||||
%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
|
||||
%struct.uint32x2x3_t = type { [3 x <2 x i32>] }
|
||||
%struct.uint32x2x4_t = type { [4 x <2 x i32>] }
|
||||
%struct.uint64x1x2_t = type { [2 x <1 x i64>] }
|
||||
%struct.uint64x1x3_t = type { [3 x <1 x i64>] }
|
||||
%struct.uint64x1x4_t = type { [4 x <1 x i64>] }
|
||||
%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
|
||||
%struct.uint8x8x3_t = type { [3 x <8 x i8>] }
|
||||
%struct.uint8x8x4_t = type { [4 x <8 x i8>] }
|
||||
%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
|
||||
%struct.uint16x8x3_t = type { [3 x <8 x i16>] }
|
||||
%struct.uint16x8x4_t = type { [4 x <8 x i16>] }
|
||||
%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
|
||||
%struct.uint32x4x3_t = type { [3 x <4 x i32>] }
|
||||
%struct.uint32x4x4_t = type { [4 x <4 x i32>] }
|
||||
%struct.uint64x2x2_t = type { [2 x <2 x i64>] }
|
||||
%struct.uint64x2x3_t = type { [3 x <2 x i64>] }
|
||||
%struct.uint64x2x4_t = type { [4 x <2 x i64>] }
|
||||
%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
|
||||
%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
|
||||
%struct.uint8x16x4_t = type { [4 x <16 x i8>] }
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>, <4 x i16>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>, <2 x i32>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>, <1 x i64>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>, <8 x i8>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>, <8 x i16>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>, <4 x i32>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>, <2 x i64>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) argmemonly nounwind
|
||||
|
||||
declare void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
|
||||
declare void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
|
||||
|
||||
; CHECK-LABEL: test_vst1_u16_x2
|
||||
; CHECK: vst1.16 {d16, d17}, [r0:64]
|
||||
define void @test_vst1_u16_x2(i16* %a, %struct.uint16x4x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u16_x3
|
||||
; CHECK: vst1.16 {d16, d17, d18}, [r0:64]
|
||||
define void @test_vst1_u16_x3(i16* %a, %struct.uint16x4x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint16x4x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1, <4 x i16> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u16_x4
|
||||
; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1_u16_x4(i16* %a, %struct.uint16x4x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint16x4x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint16x4x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1, <4 x i16> %b2, <4 x i16> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u32_x2
|
||||
; CHECK: vst1.32 {d16, d17}, [r0:64]
|
||||
define void @test_vst1_u32_x2(i32* %a, %struct.uint32x2x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x2x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u32_x3
|
||||
; CHECK: vst1.32 {d16, d17, d18}, [r0:64]
|
||||
define void @test_vst1_u32_x3(i32* %a, %struct.uint32x2x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x2x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint32x2x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1, <2 x i32> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u32_x4
|
||||
; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1_u32_x4(i32* %a, %struct.uint32x2x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x2x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint32x2x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint32x2x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1, <2 x i32> %b2, <2 x i32> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u64_x2
|
||||
; CHECK: vst1.64 {d16, d17}, [r0:64]
|
||||
define void @test_vst1_u64_x2(i64* %a, %struct.uint64x1x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x1x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u64_x3
|
||||
; CHECK: vst1.64 {d16, d17, d18}, [r0:64]
|
||||
define void @test_vst1_u64_x3(i64* %a, %struct.uint64x1x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x1x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint64x1x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1, <1 x i64> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u64_x4
|
||||
; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1_u64_x4(i64* %a, %struct.uint64x1x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x1x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint64x1x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint64x1x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1, <1 x i64> %b2, <1 x i64> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u8_x2
|
||||
; CHECK: vst1.8 {d16, d17}, [r0:64]
|
||||
define void @test_vst1_u8_x2(i8* %a, %struct.uint8x8x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x8x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u8_x3
|
||||
; CHECK: vst1.8 {d16, d17, d18}, [r0:64]
|
||||
define void @test_vst1_u8_x3(i8* %a, %struct.uint8x8x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x8x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint8x8x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1, <8 x i8> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1_u8_x4
|
||||
; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1_u8_x4(i8* %a, %struct.uint8x8x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x8x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint8x8x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint8x8x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1, <8 x i8> %b2, <8 x i8> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u16_x2
|
||||
; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1q_u16_x2(i16* %a, %struct.uint16x8x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x8x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u16_x3
|
||||
; CHECK: vst1.16 {d16, d17, d18}, [r0:64]!
|
||||
; CHECK: vst1.16 {d19, d20, d21}, [r0:64]
|
||||
define void @test_vst1q_u16_x3(i16* %a, %struct.uint16x8x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x8x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint16x8x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1, <8 x i16> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u16_x4
|
||||
; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]!
|
||||
; CHECK: vst1.16 {d20, d21, d22, d23}, [r0:256]
|
||||
define void @test_vst1q_u16_x4(i16* %a, %struct.uint16x8x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint16x8x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint16x8x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint16x8x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1, <8 x i16> %b2, <8 x i16> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u32_x2
|
||||
; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1q_u32_x2(i32* %a, %struct.uint32x4x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x4x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u32_x3
|
||||
; CHECK: vst1.32 {d16, d17, d18}, [r0:64]!
|
||||
; CHECK: vst1.32 {d19, d20, d21}, [r0:64]
|
||||
define void @test_vst1q_u32_x3(i32* %a, %struct.uint32x4x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x4x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint32x4x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1, <4 x i32> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u32_x4
|
||||
; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]!
|
||||
; CHECK: vst1.32 {d20, d21, d22, d23}, [r0:256]
|
||||
define void @test_vst1q_u32_x4(i32* %a, %struct.uint32x4x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint32x4x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint32x4x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint32x4x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1, <4 x i32> %b2, <4 x i32> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u64_x2
|
||||
; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1q_u64_x2(i64* %a, %struct.uint64x2x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x2x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u64_x3
|
||||
; CHECK: vst1.64 {d16, d17, d18}, [r0:64]!
|
||||
; CHECK: vst1.64 {d19, d20, d21}, [r0:64]
|
||||
define void @test_vst1q_u64_x3(i64* %a, %struct.uint64x2x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x2x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint64x2x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1, <2 x i64> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u64_x4
|
||||
; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]!
|
||||
; CHECK: vst1.64 {d20, d21, d22, d23}, [r0:256]
|
||||
define void @test_vst1q_u64_x4(i64* %a, %struct.uint64x2x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint64x2x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint64x2x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint64x2x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1, <2 x i64> %b2, <2 x i64> %b3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u8_x2
|
||||
; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
|
||||
define void @test_vst1q_u8_x2(i8* %a, %struct.uint8x16x2_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x16x2_t %b, 0, 1
|
||||
tail call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u8_x3
|
||||
; CHECK: vst1.8 {d16, d17, d18}, [r0:64]!
|
||||
; CHECK: vst1.8 {d19, d20, d21}, [r0:64]
|
||||
define void @test_vst1q_u8_x3(i8* %a, %struct.uint8x16x3_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x16x3_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint8x16x3_t %b, 0, 2
|
||||
tail call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1, <16 x i8> %b2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_vst1q_u8_x4
|
||||
; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]!
|
||||
; CHECK: vst1.8 {d20, d21, d22, d23}, [r0:256]
|
||||
define void @test_vst1q_u8_x4(i8* %a, %struct.uint8x16x4_t %b) nounwind {
|
||||
entry:
|
||||
%b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0
|
||||
%b1 = extractvalue %struct.uint8x16x4_t %b, 0, 1
|
||||
%b2 = extractvalue %struct.uint8x16x4_t %b, 0, 2
|
||||
%b3 = extractvalue %struct.uint8x16x4_t %b, 0, 3
|
||||
tail call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1, <16 x i8> %b2, <16 x i8> %b3)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user