mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Recognize VIDUP from BUILDVECTORs of additions
This adds a pattern to recognize VIDUP from BUILD_VECTOR of incrementing adds. This can come up from either geps or adds, and came up recently in D100550. We are just looking for a BUILD_VECTOR where each lane is an add of the first lane with N*i, where i is the lane and N is one of 1, 2, 4, or 8, supported by the VIDUP instruction. Differential Revision: https://reviews.llvm.org/D101263
This commit is contained in:
parent
529c8f9a7d
commit
5f03ae3ed7
@ -1716,6 +1716,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(ARMISD::VQMOVNu)
|
||||
MAKE_CASE(ARMISD::VCVTN)
|
||||
MAKE_CASE(ARMISD::VCVTL)
|
||||
MAKE_CASE(ARMISD::VIDUP)
|
||||
MAKE_CASE(ARMISD::VMULLs)
|
||||
MAKE_CASE(ARMISD::VMULLu)
|
||||
MAKE_CASE(ARMISD::VQDMULH)
|
||||
@ -7430,6 +7431,39 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
|
||||
return Base;
|
||||
}
|
||||
|
||||
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
if (!ST->hasMVEIntegerOps())
|
||||
return SDValue();
|
||||
|
||||
// We are looking for a buildvector where each element is Op[0] + i*N
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
// Get the increment value from operand 1
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
|
||||
!isa<ConstantSDNode>(Op1.getOperand(1)))
|
||||
return SDValue();
|
||||
unsigned N = Op1.getConstantOperandVal(1);
|
||||
if (N != 1 && N != 2 && N != 4 && N != 8)
|
||||
return SDValue();
|
||||
|
||||
// Check that each other operand matches
|
||||
for (unsigned I = 2; I < NumElts; I++) {
|
||||
SDValue OpI = Op.getOperand(I);
|
||||
if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
|
||||
!isa<ConstantSDNode>(OpI.getOperand(1)) ||
|
||||
OpI.getConstantOperandVal(1) != I * N)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
|
||||
DAG.getConstant(N, DL, MVT::i32));
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it.
|
||||
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
@ -7441,6 +7475,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
|
||||
return LowerBUILD_VECTOR_i1(Op, DAG, ST);
|
||||
|
||||
if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
|
||||
return R;
|
||||
|
||||
APInt SplatBits, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
@ -215,6 +215,9 @@ class VectorType;
|
||||
// lanes
|
||||
VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
|
||||
|
||||
// MVE VIDUP instruction, taking a start value and increment.
|
||||
VIDUP,
|
||||
|
||||
// Vector multiply long:
|
||||
VMULLs, // ...signed
|
||||
VMULLu, // ...unsigned
|
||||
|
@ -256,6 +256,10 @@ def ARMvduplane : SDNode<"ARMISD::VDUPLANE",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisVT<2, i32>]>>;
|
||||
|
||||
def SDTARMVIDUP : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisVT<1, i32>,
|
||||
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
|
||||
def ARMvidup : SDNode<"ARMISD::VIDUP", SDTARMVIDUP>;
|
||||
|
||||
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||
def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
|
||||
def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
|
||||
|
@ -5705,11 +5705,12 @@ defm MVE_VQDMLASH_qr : MVE_VQDMLAH_qr_types<"vqdmlash", 0b1, 0b1>;
|
||||
defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
|
||||
|
||||
class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
|
||||
list<dag> pattern=[]>
|
||||
ValueType VT, SDPatternOperator vxdup>
|
||||
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
|
||||
(ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
|
||||
iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
|
||||
pattern> {
|
||||
[(set (VT MQPR:$Qd), (i32 tGPREven:$Rn),
|
||||
(vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> {
|
||||
bits<4> Qd;
|
||||
bits<4> Rn;
|
||||
bits<2> imm;
|
||||
@ -5730,13 +5731,13 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
|
||||
def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
|
||||
def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
|
||||
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0, v16i8, ARMvidup>;
|
||||
def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0, v8i16, ARMvidup>;
|
||||
def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0, v4i32, ARMvidup>;
|
||||
|
||||
def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1>;
|
||||
def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
|
||||
def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
|
||||
def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1, v16i8, null_frag>;
|
||||
def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1, v8i16, null_frag>;
|
||||
def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1, v4i32, null_frag>;
|
||||
|
||||
class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
|
||||
list<dag> pattern=[]>
|
||||
|
@ -14,55 +14,17 @@ define i32 @a(i32* readnone %b, i8* %c) {
|
||||
; CHECK-NEXT: it ls
|
||||
; CHECK-NEXT: popls {r4, pc}
|
||||
; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
|
||||
; CHECK-NEXT: subs r0, r0, r1
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: add.w r2, r0, #15
|
||||
; CHECK-NEXT: mov r12, r1
|
||||
; CHECK-NEXT: bic r2, r2, #15
|
||||
; CHECK-NEXT: subs r2, #16
|
||||
; CHECK-NEXT: add.w r3, r3, r2, lsr #4
|
||||
; CHECK-NEXT: subs r4, r0, r1
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: dls lr, r3
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: dlstp.8 lr, r4
|
||||
; CHECK-NEXT: .LBB0_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r3, r1, r2
|
||||
; CHECK-NEXT: vctp.8 r0
|
||||
; CHECK-NEXT: vmov.8 q0[0], r3
|
||||
; CHECK-NEXT: adds r4, r3, #1
|
||||
; CHECK-NEXT: vmov.8 q0[1], r4
|
||||
; CHECK-NEXT: adds r4, r3, #2
|
||||
; CHECK-NEXT: vmov.8 q0[2], r4
|
||||
; CHECK-NEXT: adds r4, r3, #3
|
||||
; CHECK-NEXT: vmov.8 q0[3], r4
|
||||
; CHECK-NEXT: adds r4, r3, #4
|
||||
; CHECK-NEXT: vmov.8 q0[4], r4
|
||||
; CHECK-NEXT: adds r4, r3, #5
|
||||
; CHECK-NEXT: vmov.8 q0[5], r4
|
||||
; CHECK-NEXT: adds r4, r3, #6
|
||||
; CHECK-NEXT: vmov.8 q0[6], r4
|
||||
; CHECK-NEXT: adds r4, r3, #7
|
||||
; CHECK-NEXT: vmov.8 q0[7], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: vmov.8 q0[8], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #9
|
||||
; CHECK-NEXT: vmov.8 q0[9], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #10
|
||||
; CHECK-NEXT: vmov.8 q0[10], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #11
|
||||
; CHECK-NEXT: vmov.8 q0[11], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #12
|
||||
; CHECK-NEXT: vmov.8 q0[12], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #13
|
||||
; CHECK-NEXT: vmov.8 q0[13], r4
|
||||
; CHECK-NEXT: add.w r4, r3, #14
|
||||
; CHECK-NEXT: adds r0, r1, r2
|
||||
; CHECK-NEXT: adds r2, #16
|
||||
; CHECK-NEXT: subs r0, #16
|
||||
; CHECK-NEXT: vmov.8 q0[14], r4
|
||||
; CHECK-NEXT: adds r3, #15
|
||||
; CHECK-NEXT: vmov.8 q0[15], r3
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vstrbt.8 q0, [r12], #16
|
||||
; CHECK-NEXT: le lr, .LBB0_2
|
||||
; CHECK-NEXT: vidup.u8 q0, r0, #1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r3], #16
|
||||
; CHECK-NEXT: letp lr, .LBB0_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
|
@ -4,11 +4,7 @@
|
||||
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_1(i32 %index) {
|
||||
; CHECK-LABEL: vidup_v4i32_1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: adds r1, r0, #2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: adds r1, r0, #3
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vidup.u32 q0, r0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i32 %index, 1
|
||||
%a2 = add i32 %index, 2
|
||||
@ -24,11 +20,7 @@ define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_1(i32 %index) {
|
||||
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_2(i32 %index) {
|
||||
; CHECK-LABEL: vidup_v4i32_2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: adds r1, r0, #6
|
||||
; CHECK-NEXT: adds r0, #2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vidup.u32 q0, r0, #2
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i32 %index, 2
|
||||
%a2 = add i32 %index, 4
|
||||
@ -64,11 +56,7 @@ define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_3(i32 %index) {
|
||||
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_4(i32 %index) {
|
||||
; CHECK-LABEL: vidup_v4i32_4:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: add.w r1, r0, #12
|
||||
; CHECK-NEXT: adds r0, #4
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vidup.u32 q0, r0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i32 %index, 4
|
||||
%a2 = add i32 %index, 8
|
||||
@ -84,11 +72,7 @@ define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_4(i32 %index) {
|
||||
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_8(i32 %index) {
|
||||
; CHECK-LABEL: vidup_v4i32_8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add.w r1, r0, #16
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: add.w r1, r0, #24
|
||||
; CHECK-NEXT: adds r0, #8
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
||||
; CHECK-NEXT: vidup.u32 q0, r0, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i32 %index, 8
|
||||
%a2 = add i32 %index, 16
|
||||
@ -125,21 +109,7 @@ define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_1_i(i32 %index) {
|
||||
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_1(i16 %index) {
|
||||
; CHECK-LABEL: vidup_v8i16_1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: adds r1, r0, #1
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: adds r1, r0, #2
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: adds r1, r0, #3
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: adds r1, r0, #5
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: adds r1, r0, #6
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: adds r0, #7
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vidup.u16 q0, r0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i16 %index, 1
|
||||
%a2 = add i16 %index, 2
|
||||
@ -163,21 +133,7 @@ define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_1(i16 %index) {
|
||||
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_2(i16 %index) {
|
||||
; CHECK-LABEL: vidup_v8i16_2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: adds r1, r0, #2
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: adds r1, r0, #6
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #10
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: adds r0, #14
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vidup.u16 q0, r0, #2
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i16 %index, 2
|
||||
%a2 = add i16 %index, 4
|
||||
@ -201,21 +157,7 @@ define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_2(i16 %index) {
|
||||
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_4(i16 %index) {
|
||||
; CHECK-LABEL: vidup_v8i16_4:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #12
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #16
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #20
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #24
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: adds r0, #28
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vidup.u16 q0, r0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i16 %index, 4
|
||||
%a2 = add i16 %index, 8
|
||||
@ -239,21 +181,7 @@ define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_4(i16 %index) {
|
||||
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_8(i16 %index) {
|
||||
; CHECK-LABEL: vidup_v8i16_8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.16 q0[0], r0
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov.16 q0[1], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #16
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #24
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #32
|
||||
; CHECK-NEXT: vmov.16 q0[4], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #40
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #48
|
||||
; CHECK-NEXT: vmov.16 q0[6], r1
|
||||
; CHECK-NEXT: adds r0, #56
|
||||
; CHECK-NEXT: vmov.16 q0[7], r0
|
||||
; CHECK-NEXT: vidup.u16 q0, r0, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i16 %index, 8
|
||||
%a2 = add i16 %index, 16
|
||||
@ -277,37 +205,7 @@ define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_8(i16 %index) {
|
||||
define arm_aapcs_vfpcc <16 x i8> @vidup_v16i8_1(i8 %index) {
|
||||
; CHECK-LABEL: vidup_v16i8_1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.8 q0[0], r0
|
||||
; CHECK-NEXT: adds r1, r0, #1
|
||||
; CHECK-NEXT: vmov.8 q0[1], r1
|
||||
; CHECK-NEXT: adds r1, r0, #2
|
||||
; CHECK-NEXT: vmov.8 q0[2], r1
|
||||
; CHECK-NEXT: adds r1, r0, #3
|
||||
; CHECK-NEXT: vmov.8 q0[3], r1
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov.8 q0[4], r1
|
||||
; CHECK-NEXT: adds r1, r0, #5
|
||||
; CHECK-NEXT: vmov.8 q0[5], r1
|
||||
; CHECK-NEXT: adds r1, r0, #6
|
||||
; CHECK-NEXT: vmov.8 q0[6], r1
|
||||
; CHECK-NEXT: adds r1, r0, #7
|
||||
; CHECK-NEXT: vmov.8 q0[7], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov.8 q0[8], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #9
|
||||
; CHECK-NEXT: vmov.8 q0[9], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #10
|
||||
; CHECK-NEXT: vmov.8 q0[10], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #11
|
||||
; CHECK-NEXT: vmov.8 q0[11], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #12
|
||||
; CHECK-NEXT: vmov.8 q0[12], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #13
|
||||
; CHECK-NEXT: vmov.8 q0[13], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #14
|
||||
; CHECK-NEXT: vmov.8 q0[14], r1
|
||||
; CHECK-NEXT: adds r0, #15
|
||||
; CHECK-NEXT: vmov.8 q0[15], r0
|
||||
; CHECK-NEXT: vidup.u8 q0, r0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i8 %index, 1
|
||||
%a2 = add i8 %index, 2
|
||||
@ -347,37 +245,7 @@ define arm_aapcs_vfpcc <16 x i8> @vidup_v16i8_1(i8 %index) {
|
||||
define arm_aapcs_vfpcc <16 x i8> @vidup_v16i8_4(i8 %index) {
|
||||
; CHECK-LABEL: vidup_v16i8_4:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.8 q0[0], r0
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: vmov.8 q0[1], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #8
|
||||
; CHECK-NEXT: vmov.8 q0[2], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #12
|
||||
; CHECK-NEXT: vmov.8 q0[3], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #16
|
||||
; CHECK-NEXT: vmov.8 q0[4], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #20
|
||||
; CHECK-NEXT: vmov.8 q0[5], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #24
|
||||
; CHECK-NEXT: vmov.8 q0[6], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #28
|
||||
; CHECK-NEXT: vmov.8 q0[7], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #32
|
||||
; CHECK-NEXT: vmov.8 q0[8], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #36
|
||||
; CHECK-NEXT: vmov.8 q0[9], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #40
|
||||
; CHECK-NEXT: vmov.8 q0[10], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #44
|
||||
; CHECK-NEXT: vmov.8 q0[11], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #48
|
||||
; CHECK-NEXT: vmov.8 q0[12], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #52
|
||||
; CHECK-NEXT: vmov.8 q0[13], r1
|
||||
; CHECK-NEXT: add.w r1, r0, #56
|
||||
; CHECK-NEXT: vmov.8 q0[14], r1
|
||||
; CHECK-NEXT: adds r0, #60
|
||||
; CHECK-NEXT: vmov.8 q0[15], r0
|
||||
; CHECK-NEXT: vidup.u8 q0, r0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
%a1 = add i8 %index, 4
|
||||
%a2 = add i8 %index, 8
|
||||
|
Loading…
x
Reference in New Issue
Block a user