mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[ARM] Turn sext_inreg(VGetLaneu) into VGetLaneu
This adds a DAG combine for converting sext_inreg of VGetLaneu into VGetLanes, providing the types match correctly. Differential Revision: https://reviews.llvm.org/D95073
This commit is contained in:
parent
7618d9f77d
commit
9248cc33ab
@ -977,6 +977,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
@ -13985,6 +13986,20 @@ static SDValue PerformExtractEltCombine(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue Op = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// sext_inreg(VGETLANEu) -> VGETLANEs
|
||||
if (Op.getOpcode() == ARMISD::VGETLANEu &&
|
||||
cast<VTSDNode>(N->getOperand(1))->getVT() ==
|
||||
Op.getOperand(0).getValueType().getScalarType())
|
||||
return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
|
||||
Op.getOperand(1));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
|
||||
/// ISD::VECTOR_SHUFFLE.
|
||||
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
|
||||
@ -16356,6 +16371,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
return PerformExtractEltCombine(N, DCI, Subtarget);
|
||||
case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
|
||||
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
|
||||
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
|
||||
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
|
||||
|
||||
define float @f(<4 x i16>* nocapture %in) {
|
||||
@ -64,12 +65,10 @@ define <4 x i32> @h(<4 x i8> *%in) {
|
||||
}
|
||||
|
||||
define float @i(<4 x i16>* nocapture %in) {
|
||||
; FIXME: The vmov.u + sxt can convert to a vmov.s
|
||||
; CHECK-LABEL: i:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[0]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: vmov.s16 r0, d16[0]
|
||||
; CHECK-NEXT: vmov s0, r0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s0, s0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
@ -96,12 +95,10 @@ define float @j(<8 x i8>* nocapture %in) {
|
||||
}
|
||||
|
||||
define float @k(<8 x i8>* nocapture %in) {
|
||||
; FIXME: The vmov.u + sxt can convert to a vmov.s
|
||||
; CHECK-LABEL: k:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vmov.u8 r0, d16[7]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: vmov.s8 r0, d16[7]
|
||||
; CHECK-NEXT: vmov s0, r0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s0, s0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
|
@ -154,58 +154,40 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @sdiv_i16(<8 x i16> %in1, <8 x i16> %in2) {
|
||||
; CHECK-LABEL: sdiv_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: sdiv r12, r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[2]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: vmov.u16 r4, q1[6]
|
||||
; CHECK-NEXT: sdiv r3, r2, r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: vmov.u16 r5, q0[6]
|
||||
; CHECK-NEXT: sdiv r0, r2, r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[0]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[0]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[0]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.16 q2[0], r0
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.16 q2[0], r1
|
||||
; CHECK-NEXT: sxth.w lr, r2
|
||||
; CHECK-NEXT: vmov.16 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.16 q2[2], r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q1[4]
|
||||
; CHECK-NEXT: sxth r6, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[5]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: vmov.16 q2[3], r12
|
||||
; CHECK-NEXT: sdiv r2, r2, r3
|
||||
; CHECK-NEXT: sxth r5, r5
|
||||
; CHECK-NEXT: vmov.16 q2[4], r2
|
||||
; CHECK-NEXT: sdiv r0, r0, r1
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.16 q2[1], r1
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[2]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov.16 q2[2], r0
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[3]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[4]
|
||||
; CHECK-NEXT: vmov.16 q2[3], r0
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[4]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[5]
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[5]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[6]
|
||||
; CHECK-NEXT: vmov.16 q2[5], r0
|
||||
; CHECK-NEXT: sdiv r0, r5, r4
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[6]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.16 q2[6], r0
|
||||
; CHECK-NEXT: sdiv r0, r6, lr
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[7]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.16 q2[7], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%out = sdiv <8 x i16> %in1, %in2
|
||||
ret <8 x i16> %out
|
||||
@ -265,65 +247,49 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @srem_i16(<8 x i16> %in1, <8 x i16> %in2) {
|
||||
; CHECK-LABEL: srem_i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: vmov.u16 r5, q1[6]
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[6]
|
||||
; CHECK-NEXT: sxth r5, r5
|
||||
; CHECK-NEXT: sxth r6, r6
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[0]
|
||||
; CHECK-NEXT: sdiv r7, r6, r5
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[7]
|
||||
; CHECK-NEXT: sxth.w r8, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: mls r12, r7, r5, r6
|
||||
; CHECK-NEXT: vmov.u16 r7, q0[7]
|
||||
; CHECK-NEXT: sxth r3, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r7, r7
|
||||
; CHECK-NEXT: sxth r4, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[5]
|
||||
; CHECK-NEXT: sdiv r6, r7, r2
|
||||
; CHECK-NEXT: mls lr, r6, r2, r7
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: sxth r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[4]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sdiv r5, r2, r0
|
||||
; CHECK-NEXT: vmov.u16 r6, q0[1]
|
||||
; CHECK-NEXT: mls r0, r5, r0, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[5]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sdiv r5, r2, r1
|
||||
; CHECK-NEXT: sxth r6, r6
|
||||
; CHECK-NEXT: mls r1, r5, r1, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sdiv r5, r2, r4
|
||||
; CHECK-NEXT: mls r2, r5, r4, r2
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[3]
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[6]
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov.s16 r1, q1[7]
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[7]
|
||||
; CHECK-NEXT: sdiv r3, r2, r1
|
||||
; CHECK-NEXT: mls lr, r3, r1, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q1[4]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[4]
|
||||
; CHECK-NEXT: sdiv r0, r3, r2
|
||||
; CHECK-NEXT: mls r2, r0, r2, r3
|
||||
; CHECK-NEXT: vmov.s16 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[5]
|
||||
; CHECK-NEXT: sdiv r1, r3, r0
|
||||
; CHECK-NEXT: mls r0, r1, r0, r3
|
||||
; CHECK-NEXT: vmov.s16 r1, q1[2]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: sdiv r4, r3, r1
|
||||
; CHECK-NEXT: mls r1, r4, r1, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q1[3]
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[3]
|
||||
; CHECK-NEXT: sdiv r5, r4, r3
|
||||
; CHECK-NEXT: mls r3, r5, r3, r4
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[0]
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: sdiv r5, r4, r8
|
||||
; CHECK-NEXT: mls r4, r5, r8, r4
|
||||
; CHECK-NEXT: vmov.u16 r5, q1[1]
|
||||
; CHECK-NEXT: sxth r5, r5
|
||||
; CHECK-NEXT: vmov.s16 r4, q1[0]
|
||||
; CHECK-NEXT: vmov.s16 r5, q0[0]
|
||||
; CHECK-NEXT: sdiv r6, r5, r4
|
||||
; CHECK-NEXT: mls r4, r6, r4, r5
|
||||
; CHECK-NEXT: vmov.s16 r6, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r5, q1[1]
|
||||
; CHECK-NEXT: sdiv r7, r6, r5
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: mls r5, r7, r5, r6
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: vmov.16 q0[2], r2
|
||||
; CHECK-NEXT: vmov.16 q0[2], r1
|
||||
; CHECK-NEXT: vmov.16 q0[3], r3
|
||||
; CHECK-NEXT: vmov.16 q0[4], r0
|
||||
; CHECK-NEXT: vmov.16 q0[5], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r0
|
||||
; CHECK-NEXT: vmov.16 q0[6], r12
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
entry:
|
||||
%out = srem <8 x i16> %in1, %in2
|
||||
ret <8 x i16> %out
|
||||
@ -407,106 +373,72 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @sdiv_i8(<16 x i8> %in1, <16 x i8> %in2) {
|
||||
; CHECK-LABEL: sdiv_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[0]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[0]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[0]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[0]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[3]
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.8 q2[0], r0
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q2[0], r1
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-NEXT: vmov.8 q2[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[11]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[11]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sdiv r0, r0, r1
|
||||
; CHECK-NEXT: sxtb.w r12, r2
|
||||
; CHECK-NEXT: sxtb.w lr, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[4]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.8 q2[1], r1
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[2]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q2[2], r0
|
||||
; CHECK-NEXT: sdiv r0, r5, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[3]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q2[3], r0
|
||||
; CHECK-NEXT: sdiv r0, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[4]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q2[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[10]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sdiv r12, lr, r12
|
||||
; CHECK-NEXT: sdiv lr, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[9]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[8]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[7]
|
||||
; CHECK-NEXT: sdiv r1, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[7]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sdiv r4, r3, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[6]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[12]
|
||||
; CHECK-NEXT: sdiv r5, r3, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[5]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sdiv r0, r3, r0
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[5]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q2[5], r0
|
||||
; CHECK-NEXT: sxtb r7, r3
|
||||
; CHECK-NEXT: vmov.8 q2[6], r5
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[12]
|
||||
; CHECK-NEXT: vmov.8 q2[7], r4
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.8 q2[8], r1
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[13]
|
||||
; CHECK-NEXT: vmov.8 q2[9], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[13]
|
||||
; CHECK-NEXT: vmov.8 q2[10], lr
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[14]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[14]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.8 q2[11], r12
|
||||
; CHECK-NEXT: sdiv r3, r6, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.8 q2[12], r3
|
||||
; CHECK-NEXT: sdiv r1, r2, r1
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: vmov.8 q2[13], r1
|
||||
; CHECK-NEXT: sdiv r1, r4, r5
|
||||
; CHECK-NEXT: sdiv r0, r0, r7
|
||||
; CHECK-NEXT: vmov.8 q2[14], r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[6]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q2[6], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[7]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[8]
|
||||
; CHECK-NEXT: vmov.8 q2[7], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[8]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[9]
|
||||
; CHECK-NEXT: vmov.8 q2[8], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[9]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[10]
|
||||
; CHECK-NEXT: vmov.8 q2[9], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[10]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[11]
|
||||
; CHECK-NEXT: vmov.8 q2[10], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[11]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[12]
|
||||
; CHECK-NEXT: vmov.8 q2[11], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[12]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[13]
|
||||
; CHECK-NEXT: vmov.8 q2[12], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[13]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[14]
|
||||
; CHECK-NEXT: vmov.8 q2[13], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[14]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[15]
|
||||
; CHECK-NEXT: vmov.8 q2[14], r0
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[15]
|
||||
; CHECK-NEXT: sdiv r0, r1, r0
|
||||
; CHECK-NEXT: vmov.8 q2[15], r0
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%out = sdiv <16 x i8> %in1, %in2
|
||||
ret <16 x i8> %out
|
||||
@ -607,122 +539,90 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @srem_i8(<16 x i8> %in1, <16 x i8> %in2) {
|
||||
; CHECK-LABEL: srem_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[14]
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[14]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sdiv r7, r6, r5
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[15]
|
||||
; CHECK-NEXT: mls r12, r7, r5, r6
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[15]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[13]
|
||||
; CHECK-NEXT: sxtb r7, r7
|
||||
; CHECK-NEXT: sxtb r3, r2
|
||||
; CHECK-NEXT: sdiv r6, r7, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[12]
|
||||
; CHECK-NEXT: mls lr, r6, r4, r7
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[12]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[8]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb.w r8, r0
|
||||
; CHECK-NEXT: sdiv r5, r4, r2
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[11]
|
||||
; CHECK-NEXT: mls r9, r5, r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[13]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.u8 r6, q0[0]
|
||||
; CHECK-NEXT: sdiv r5, r4, r3
|
||||
; CHECK-NEXT: sxtb r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[10]
|
||||
; CHECK-NEXT: mls r3, r5, r3, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[10]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sdiv r5, r4, r0
|
||||
; CHECK-NEXT: mls r0, r5, r0, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[11]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[14]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[14]
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r12, r2, r0, r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[15]
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls lr, r2, r0, r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[12]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[12]
|
||||
; CHECK-NEXT: sdiv r2, r1, r0
|
||||
; CHECK-NEXT: mls r8, r2, r0, r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[13]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[13]
|
||||
; CHECK-NEXT: sdiv r3, r1, r0
|
||||
; CHECK-NEXT: mls r3, r3, r0, r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[10]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[10]
|
||||
; CHECK-NEXT: sdiv r4, r1, r0
|
||||
; CHECK-NEXT: mls r0, r4, r0, r1
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[11]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[11]
|
||||
; CHECK-NEXT: sdiv r5, r4, r1
|
||||
; CHECK-NEXT: mls r1, r5, r1, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[8]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sdiv r5, r4, r8
|
||||
; CHECK-NEXT: mls r4, r5, r8, r4
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[0]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[8]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[8]
|
||||
; CHECK-NEXT: sdiv r6, r5, r4
|
||||
; CHECK-NEXT: mls r4, r6, r4, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[0]
|
||||
; CHECK-NEXT: vmov.s8 r6, q0[0]
|
||||
; CHECK-NEXT: sdiv r7, r6, r5
|
||||
; CHECK-NEXT: mls r5, r7, r5, r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r7, q0[1]
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sxtb r7, r7
|
||||
; CHECK-NEXT: vmov.8 q2[0], r5
|
||||
; CHECK-NEXT: vmov.s8 r6, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r7, q0[1]
|
||||
; CHECK-NEXT: sdiv r2, r7, r6
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q2[0], r5
|
||||
; CHECK-NEXT: mls r2, r2, r6, r7
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[2]
|
||||
; CHECK-NEXT: vmov.8 q2[1], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[2]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[2]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[3]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[3]
|
||||
; CHECK-NEXT: vmov.8 q2[2], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[3]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[3]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[4]
|
||||
; CHECK-NEXT: vmov.8 q2[3], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[4]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[5]
|
||||
; CHECK-NEXT: vmov.8 q2[4], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[5]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[5]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[6]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[6]
|
||||
; CHECK-NEXT: vmov.8 q2[5], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[6]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[6]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[7]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.8 q2[6], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[7]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[7]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[9]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[9]
|
||||
; CHECK-NEXT: vmov.8 q2[7], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[9]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.8 q2[8], r4
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[9]
|
||||
; CHECK-NEXT: sdiv r6, r5, r2
|
||||
; CHECK-NEXT: vmov.8 q2[8], r4
|
||||
; CHECK-NEXT: mls r2, r6, r2, r5
|
||||
; CHECK-NEXT: vmov.8 q2[9], r2
|
||||
; CHECK-NEXT: vmov.8 q2[10], r0
|
||||
; CHECK-NEXT: vmov.8 q2[11], r1
|
||||
; CHECK-NEXT: vmov.8 q2[12], r9
|
||||
; CHECK-NEXT: vmov.8 q2[12], r8
|
||||
; CHECK-NEXT: vmov.8 q2[13], r3
|
||||
; CHECK-NEXT: vmov.8 q2[14], r12
|
||||
; CHECK-NEXT: vmov.8 q2[15], lr
|
||||
; CHECK-NEXT: vmov q0, q2
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%out = srem <16 x i8> %in1, %in2
|
||||
ret <16 x i8> %out
|
||||
|
@ -91,10 +91,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
|
||||
; CHECK-MVE-LABEL: foo_half_int16:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: sxth r1, r1
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[0]
|
||||
; CHECK-MVE-NEXT: vmov.s16 r1, q0[1]
|
||||
; CHECK-MVE-NEXT: vmov s4, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4
|
||||
; CHECK-MVE-NEXT: vmov r0, s4
|
||||
@ -102,39 +100,33 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4
|
||||
; CHECK-MVE-NEXT: vmov r1, s4
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[0], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[2]
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[1], r1
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov s8, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[2], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[3]
|
||||
; CHECK-MVE-NEXT: vmov s8, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[3], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[4]
|
||||
; CHECK-MVE-NEXT: vmov s8, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[4], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[5]
|
||||
; CHECK-MVE-NEXT: vmov s8, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[5], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[6]
|
||||
; CHECK-MVE-NEXT: vmov s8, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-MVE-NEXT: vmov r0, s8
|
||||
; CHECK-MVE-NEXT: vmov.16 q1[6], r0
|
||||
; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-MVE-NEXT: sxth r0, r0
|
||||
; CHECK-MVE-NEXT: vmov.s16 r0, q0[7]
|
||||
; CHECK-MVE-NEXT: vmov s0, r0
|
||||
; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0
|
||||
; CHECK-MVE-NEXT: vmov r0, s0
|
||||
|
@ -178,10 +178,8 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
|
||||
; CHECK-LABEL: add_v8i16_v8i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: vmov.s16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: asrs r2, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
@ -190,11 +188,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[3]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -205,11 +201,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[4]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[5]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -220,13 +214,11 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[7]
|
||||
; CHECK-NEXT: adc.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[6]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[6]
|
||||
; CHECK-NEXT: adds r2, r2, r1
|
||||
; CHECK-NEXT: adc.w r1, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[7]
|
||||
; CHECK-NEXT: sxth r3, r0
|
||||
; CHECK-NEXT: adds r0, r2, r3
|
||||
; CHECK-NEXT: adc.w r1, r1, r3, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -545,10 +537,8 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-LABEL: add_v16i8_v16i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[0]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: asrs r2, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
@ -557,11 +547,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r1, s5
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r1, r0, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[3]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -572,11 +560,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[4]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[5]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[5]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -587,11 +573,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[6]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[6]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[7]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[7]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -602,11 +586,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[9]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[9]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -617,11 +599,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[10]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[10]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[11]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[11]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -632,11 +612,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[13]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[13]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r1
|
||||
; CHECK-NEXT: asrs r0, r1, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -647,13 +625,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[15]
|
||||
; CHECK-NEXT: adc.w r0, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[14]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[14]
|
||||
; CHECK-NEXT: adds r2, r2, r1
|
||||
; CHECK-NEXT: adc.w r1, r0, r1, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: sxtb r3, r0
|
||||
; CHECK-NEXT: adds r0, r2, r3
|
||||
; CHECK-NEXT: adc.w r1, r1, r3, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -1051,10 +1027,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1063,11 +1037,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r4, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1078,11 +1050,9 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adds.w lr, r4, r3
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[5]
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1094,12 +1064,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[6]
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[6]
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[7]
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[7]
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
@ -1447,10 +1415,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[0]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1459,11 +1425,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r3, s4
|
||||
; CHECK-NEXT: vmov r12, s5
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[2]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[3]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r4, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1474,11 +1438,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: adds.w lr, r4, r3
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[5]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[4]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1489,11 +1451,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[6]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[6]
|
||||
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[7]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[7]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1504,11 +1464,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[8]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[8]
|
||||
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[9]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[9]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1519,11 +1477,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[10]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[10]
|
||||
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[11]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[11]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1534,11 +1490,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[12]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[12]
|
||||
; CHECK-NEXT: adc.w r12, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[13]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[13]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: asrs r3, r4, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1550,12 +1504,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[14]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[14]
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[15]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[15]
|
||||
; CHECK-NEXT: adds r2, r2, r4
|
||||
; CHECK-NEXT: adc.w r3, r3, r4, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
|
@ -317,10 +317,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[0]
|
||||
; CHECK-NEXT: sxth r1, r1
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[0]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r1
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -338,10 +336,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -369,10 +365,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -392,10 +386,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
|
||||
; CHECK-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[6]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1050,10 +1042,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r2, r1
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r1
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[0]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[0]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
@ -1071,10 +1061,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[2]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[3]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[2]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1102,10 +1090,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[5]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1125,10 +1111,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r2, r3
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[6]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[7]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1174,10 +1158,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[9]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1197,10 +1179,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[10]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[11]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[10]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1228,10 +1208,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r0, r3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[13]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1251,10 +1229,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
|
||||
; CHECK-NEXT: adc.w r1, r1, r12
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[14]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[15]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[14]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
@ -1988,39 +1964,35 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
|
||||
; CHECK-NEXT: vmov.u16 r2, q1[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q1[1]
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[0]
|
||||
; CHECK-NEXT: vcmp.i32 ne, q2, zr
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r2, r12, #1
|
||||
; CHECK-NEXT: ubfx r3, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmrs lr, p0
|
||||
; CHECK-NEXT: ubfx r3, lr, #4, #1
|
||||
; CHECK-NEXT: rsb.w r12, r3, #0
|
||||
; CHECK-NEXT: and r3, lr, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r3, r12
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r3, r12
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
|
||||
; CHECK-NEXT: vand q2, q3, q2
|
||||
; CHECK-NEXT: vmov r2, s10
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov lr, s11
|
||||
; CHECK-NEXT: vmov r12, s11
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: adds r5, r4, r2
|
||||
; CHECK-NEXT: ubfx r4, r12, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r12, #8, #1
|
||||
; CHECK-NEXT: ubfx r4, lr, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, lr, #8, #1
|
||||
; CHECK-NEXT: rsb.w r4, r4, #0
|
||||
; CHECK-NEXT: rsb.w r2, r2, #0
|
||||
; CHECK-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[2]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[2]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2048,10 +2020,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[5]
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[4]
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[5]
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[4]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2071,10 +2041,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b,
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r5, r4
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u16 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.u16 r4, q0[6]
|
||||
; CHECK-NEXT: sxth r5, r5
|
||||
; CHECK-NEXT: sxth r4, r4
|
||||
; CHECK-NEXT: vmov.s16 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.s16 r4, q0[6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
|
||||
; CHECK-NEXT: asrs r5, r5, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2611,39 +2579,35 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: vmov.u16 r2, q4[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q4[1]
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r3, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[0]
|
||||
; CHECK-NEXT: vcmp.i32 ne, q5, zr
|
||||
; CHECK-NEXT: vmrs r12, p0
|
||||
; CHECK-NEXT: and r2, r12, #1
|
||||
; CHECK-NEXT: ubfx r3, r12, #4, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmrs lr, p0
|
||||
; CHECK-NEXT: ubfx r3, lr, #4, #1
|
||||
; CHECK-NEXT: rsb.w r12, r3, #0
|
||||
; CHECK-NEXT: and r3, lr, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[0]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r3, r12
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r3, r12
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[1]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
|
||||
; CHECK-NEXT: vand q5, q6, q5
|
||||
; CHECK-NEXT: vmov r2, s22
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: vmov lr, s23
|
||||
; CHECK-NEXT: vmov r12, s23
|
||||
; CHECK-NEXT: vmov r3, s21
|
||||
; CHECK-NEXT: adds r5, r4, r2
|
||||
; CHECK-NEXT: ubfx r4, r12, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r12, #8, #1
|
||||
; CHECK-NEXT: ubfx r4, lr, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, lr, #8, #1
|
||||
; CHECK-NEXT: rsb.w r4, r4, #0
|
||||
; CHECK-NEXT: rsb.w r2, r2, #0
|
||||
; CHECK-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r2, r4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[2]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[3]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[2]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2671,10 +2635,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r2, r4
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[5]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[5]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[4]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2694,10 +2656,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r5, r4
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[6]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[7]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[6]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r4, r5
|
||||
; CHECK-NEXT: asrs r5, r5, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2743,10 +2703,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[8]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[9]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[8]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2766,10 +2724,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r5, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[10]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[11]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[10]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r5
|
||||
; CHECK-NEXT: asrs r5, r5, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2797,10 +2753,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r2, r4
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[12]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[13]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[12]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
@ -2820,10 +2774,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r5, r4
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[15]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[14]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[15]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[14]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r5
|
||||
; CHECK-NEXT: asrs r5, r5, #31
|
||||
; CHECK-NEXT: asrs r4, r4, #31
|
||||
|
@ -737,14 +737,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[0]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[0]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[0]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[0]
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r0
|
||||
@ -753,15 +749,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r0, s9
|
||||
; CHECK-NEXT: adds.w lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[3]
|
||||
; CHECK-NEXT: adc.w r12, r0, r1
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[3]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[2]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[3]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[2]
|
||||
; CHECK-NEXT: smull r1, r3, r3, r1
|
||||
; CHECK-NEXT: smull r0, r2, r2, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r1
|
||||
@ -772,15 +764,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds.w lr, r1, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[5]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[5]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[5]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[5]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[4]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[4]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
@ -791,15 +779,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds.w lr, r1, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[7]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[6]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[7]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[6]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
@ -810,15 +794,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds.w lr, r1, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[9]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[9]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[8]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[9]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[8]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[8]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
@ -829,15 +809,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds.w lr, r1, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[11]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[11]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[10]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[10]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[11]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[10]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[10]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
@ -848,15 +824,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: adds.w r1, r1, lr
|
||||
; CHECK-NEXT: adc.w r0, r0, r12
|
||||
; CHECK-NEXT: adds.w lr, r1, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[13]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[13]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[12]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[13]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[12]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[12]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
||||
@ -868,15 +840,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: adds r0, r0, r1
|
||||
; CHECK-NEXT: adc.w r1, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[14]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[14]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[14]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[14]
|
||||
; CHECK-NEXT: smlal r0, r1, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[15]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[15]
|
||||
; CHECK-NEXT: smlal r0, r1, r3, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
@ -1690,20 +1658,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[1]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[1]
|
||||
; CHECK-NEXT: smull r12, r3, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[0]
|
||||
; CHECK-NEXT: sxtb.w lr, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[0]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[2]
|
||||
; CHECK-NEXT: vmov.s8 lr, q1[0]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[0]
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[2]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[2]
|
||||
; CHECK-NEXT: smull r2, lr, r2, lr
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[2]
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r12
|
||||
; CHECK-NEXT: smull r4, r5, r5, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], lr, r3
|
||||
@ -1711,11 +1673,9 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r12, s9
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[3]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[3]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[3]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[3]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r5, r3
|
||||
@ -1725,15 +1685,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[5]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[5]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[4]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[4]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[5]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[4]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[4]
|
||||
; CHECK-NEXT: smull r3, r4, r4, r3
|
||||
; CHECK-NEXT: smull r2, r5, r5, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
|
||||
@ -1744,15 +1700,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[7]
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[7]
|
||||
; CHECK-NEXT: adc.w r12, r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[7]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[6]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[6]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[7]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[6]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[6]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
|
||||
@ -1763,15 +1715,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[9]
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[9]
|
||||
; CHECK-NEXT: adc.w r12, r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[9]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[8]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[9]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[8]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
|
||||
@ -1782,15 +1730,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[11]
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[11]
|
||||
; CHECK-NEXT: adc.w r12, r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[11]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[10]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[10]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[11]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[10]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[10]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
|
||||
@ -1801,15 +1745,11 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w lr, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[13]
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[13]
|
||||
; CHECK-NEXT: adc.w r12, r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[13]
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[12]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[13]
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[12]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
|
||||
@ -1820,16 +1760,12 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds r3, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[14]
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[14]
|
||||
; CHECK-NEXT: adcs r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[14]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[14]
|
||||
; CHECK-NEXT: smlal r3, r2, r4, r5
|
||||
; CHECK-NEXT: vmov.u8 r5, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[15]
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[15]
|
||||
; CHECK-NEXT: smlal r3, r2, r4, r5
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
|
@ -1127,11 +1127,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: vcmp.i8 eq, q2, zr
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[0]
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[0]
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[4]
|
||||
; CHECK-NEXT: vmov.u8 r0, q4[0]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[4]
|
||||
; CHECK-NEXT: vmov.16 q5[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q4[1]
|
||||
; CHECK-NEXT: vmov.16 q5[1], r0
|
||||
@ -1147,7 +1146,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: vmov.16 q5[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q4[7]
|
||||
; CHECK-NEXT: vmov.16 q5[7], r0
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vpsel q5, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r0, q5[2]
|
||||
@ -1164,13 +1162,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r1
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r2, r1
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[1]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[1]
|
||||
; CHECK-NEXT: smull r1, r12, r2, r1
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[0]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[0]
|
||||
; CHECK-NEXT: smull r2, r3, r2, r3
|
||||
; CHECK-NEXT: vmov q7[2], q7[0], r2, r1
|
||||
; CHECK-NEXT: vmov q7[3], q7[1], r3, r12
|
||||
@ -1184,17 +1179,13 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: ubfx r0, r0, #8, #1
|
||||
; CHECK-NEXT: rsb.w r3, r3, #0
|
||||
; CHECK-NEXT: rsb.w r0, r0, #0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r0, r3
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r0, r3
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[2]
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[3]
|
||||
; CHECK-NEXT: vmov.u8 r2, q0[2]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[2]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[3]
|
||||
; CHECK-NEXT: smull r0, r3, r3, r0
|
||||
; CHECK-NEXT: smull r1, r2, r2, r1
|
||||
; CHECK-NEXT: vmov q7[2], q7[0], r1, r0
|
||||
@ -1207,15 +1198,14 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adc.w r2, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s26
|
||||
; CHECK-NEXT: adds.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r1, q1[4]
|
||||
; CHECK-NEXT: vmov.s8 r1, q1[4]
|
||||
; CHECK-NEXT: adc.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r2, q5[6]
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[4]
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: smull r1, r4, r4, r1
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q5[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q5[5]
|
||||
; CHECK-NEXT: smull r1, r4, r4, r1
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r3, r2
|
||||
; CHECK-NEXT: vcmp.i32 ne, q6, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
@ -1225,10 +1215,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r0, r3
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r0, r3
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[5]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[5]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[5]
|
||||
; CHECK-NEXT: smull r0, r3, r3, r0
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r1, r0
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r3
|
||||
@ -1240,23 +1228,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adds.w r1, r1, r12
|
||||
; CHECK-NEXT: adc.w r0, r0, lr
|
||||
; CHECK-NEXT: adds r1, r1, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[6]
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[6]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: ubfx r3, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[6]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r2, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[7]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r4, r0, r4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[7]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r0, r2
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r3
|
||||
; CHECK-NEXT: vand q5, q6, q5
|
||||
@ -1267,7 +1251,7 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adc.w r2, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s22
|
||||
; CHECK-NEXT: adds.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[8]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[8]
|
||||
; CHECK-NEXT: adc.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[8]
|
||||
; CHECK-NEXT: vmov.16 q5[0], r2
|
||||
@ -1285,17 +1269,15 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: vmov.16 q5[6], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[15]
|
||||
; CHECK-NEXT: vmov.16 q5[7], r2
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[8]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[8]
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vpsel q2, q3, q2
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[2]
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[0]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[3]
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[1]
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
@ -1305,10 +1287,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r3
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r4, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[9]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[9]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[9]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[9]
|
||||
; CHECK-NEXT: smull r3, r4, r4, r3
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r0, r3
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r1, r4
|
||||
@ -1320,23 +1300,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adds.w r1, r1, r12
|
||||
; CHECK-NEXT: adc.w r0, r0, lr
|
||||
; CHECK-NEXT: adds r1, r1, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[10]
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[10]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: ubfx r3, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[10]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[11]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[11]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r4, r0, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[11]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[11]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r0, r2
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r4, r3
|
||||
; CHECK-NEXT: vand q3, q4, q3
|
||||
@ -1347,19 +1323,17 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adc.w r2, r12, r0
|
||||
; CHECK-NEXT: vmov r0, s14
|
||||
; CHECK-NEXT: adds.w r12, r1, r0
|
||||
; CHECK-NEXT: vmov.u8 r0, q1[12]
|
||||
; CHECK-NEXT: vmov.s8 r0, q1[12]
|
||||
; CHECK-NEXT: adc.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[6]
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[4]
|
||||
; CHECK-NEXT: vmov.u8 r1, q0[12]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[12]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[7]
|
||||
; CHECK-NEXT: vmov.u16 r3, q2[5]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: smull r0, r1, r1, r0
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: and r4, r2, #1
|
||||
; CHECK-NEXT: ubfx r3, r2, #4, #1
|
||||
@ -1367,10 +1341,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r3
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r4, r3
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[13]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[13]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[13]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[13]
|
||||
; CHECK-NEXT: smull r3, r4, r4, r3
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r0, r3
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r4
|
||||
@ -1382,23 +1354,19 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y, <1
|
||||
; CHECK-NEXT: adds.w r1, r1, r12
|
||||
; CHECK-NEXT: adc.w r0, r0, lr
|
||||
; CHECK-NEXT: adds r1, r1, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[14]
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[14]
|
||||
; CHECK-NEXT: adc.w r12, r0, r3
|
||||
; CHECK-NEXT: ubfx r3, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[14]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[15]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: smull r0, r4, r0, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[15]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r3
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
@ -2637,11 +2605,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: vcmp.i8 eq, q2, zr
|
||||
; CHECK-NEXT: vmov.i8 q2, #0x0
|
||||
; CHECK-NEXT: vmov.i8 q3, #0xff
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[0]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[0]
|
||||
; CHECK-NEXT: vpsel q4, q3, q2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[2]
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[0]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[2]
|
||||
; CHECK-NEXT: vmov.16 q5[0], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[1]
|
||||
; CHECK-NEXT: vmov.16 q5[1], r2
|
||||
@ -2657,7 +2624,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: vmov.16 q5[6], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[7]
|
||||
; CHECK-NEXT: vmov.16 q5[7], r2
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vpsel q5, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r2, q5[2]
|
||||
@ -2674,13 +2640,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: rsbs r3, r3, #0
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r3
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r2, r3
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[1]
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[1]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[1]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[1]
|
||||
; CHECK-NEXT: smull r2, lr, r3, r2
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[0]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[0]
|
||||
; CHECK-NEXT: smull r3, r4, r4, r3
|
||||
; CHECK-NEXT: vmov q7[2], q7[0], r3, r2
|
||||
; CHECK-NEXT: vmov q7[3], q7[1], r4, lr
|
||||
@ -2696,223 +2659,196 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y
|
||||
; CHECK-NEXT: rsb.w r2, r2, #0
|
||||
; CHECK-NEXT: adc.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r3, q1[2]
|
||||
; CHECK-NEXT: vmov.s8 r3, q1[2]
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r2, r4
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[3]
|
||||
; CHECK-NEXT: vmov.u8 r4, q0[3]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: smull r2, r4, r4, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[3]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[3]
|
||||
; CHECK-NEXT: smull r3, r5, r5, r3
|
||||
; CHECK-NEXT: smull r2, r4, r4, r2
|
||||
; CHECK-NEXT: vmov q7[2], q7[0], r3, r2
|
||||
; CHECK-NEXT: vmov q7[3], q7[1], r5, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[4]
|
||||
; CHECK-NEXT: vand q6, q7, q6
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov r3, s24
|
||||
; CHECK-NEXT: vmov r2, s25
|
||||
; CHECK-NEXT: vmov r5, s26
|
||||
; CHECK-NEXT: vmov r5, s27
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: vmov r6, s27
|
||||
; CHECK-NEXT: adc.w r2, r2, lr
|
||||
; CHECK-NEXT: adds.w r12, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[4]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: adc.w lr, r2, r6
|
||||
; CHECK-NEXT: vmov.u16 r2, q5[6]
|
||||
; CHECK-NEXT: vmov.u16 r6, q5[4]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r6, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q5[7]
|
||||
; CHECK-NEXT: vmov.u16 r6, q5[5]
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r6, r2
|
||||
; CHECK-NEXT: adc.w r6, lr, r2
|
||||
; CHECK-NEXT: vmov r2, s26
|
||||
; CHECK-NEXT: adds.w r12, r3, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[4]
|
||||
; CHECK-NEXT: adc.w lr, r6, r5
|
||||
; CHECK-NEXT: vmov.u16 r6, q5[6]
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[4]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r5, r6
|
||||
; CHECK-NEXT: vmov.u16 r6, q5[7]
|
||||
; CHECK-NEXT: vmov.u16 r5, q5[5]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r5, r6
|
||||
; CHECK-NEXT: vcmp.i32 ne, q6, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: and r5, r2, #1
|
||||
; CHECK-NEXT: ubfx r6, r2, #4, #1
|
||||
; CHECK-NEXT: vmrs r6, p0
|
||||
; CHECK-NEXT: and r4, r6, #1
|
||||
; CHECK-NEXT: ubfx r5, r6, #4, #1
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r5, r6
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r5, r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q1[5]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[5]
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: smull r6, r5, r5, r6
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r3, r6
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r4, r5
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r4, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[5]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[5]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r5
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r3, r4
|
||||
; CHECK-NEXT: vand q5, q6, q5
|
||||
; CHECK-NEXT: vmov r4, s20
|
||||
; CHECK-NEXT: vmov r3, s21
|
||||
; CHECK-NEXT: vmov r5, s23
|
||||
; CHECK-NEXT: adds.w r6, r12, r4
|
||||
; CHECK-NEXT: vmov r4, s22
|
||||
; CHECK-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-NEXT: adds r6, r6, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[6]
|
||||
; CHECK-NEXT: adc.w r12, r3, r5
|
||||
; CHECK-NEXT: ubfx r5, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r2, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[7]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[7]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: smull r2, r5, r5, r2
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r3, r2
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[8]
|
||||
; CHECK-NEXT: vand q5, q6, q5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov r3, s20
|
||||
; CHECK-NEXT: vmov r2, s21
|
||||
; CHECK-NEXT: vmov r5, s22
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: vmov r6, s23
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w r12, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[8]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: adc.w lr, r2, r6
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[8]
|
||||
; CHECK-NEXT: vmov.16 q5[0], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[9]
|
||||
; CHECK-NEXT: vmov.16 q5[1], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[10]
|
||||
; CHECK-NEXT: vmov.16 q5[2], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[11]
|
||||
; CHECK-NEXT: vmov.16 q5[3], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[12]
|
||||
; CHECK-NEXT: vmov.16 q5[4], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[13]
|
||||
; CHECK-NEXT: vmov.16 q5[5], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[14]
|
||||
; CHECK-NEXT: vmov.16 q5[6], r2
|
||||
; CHECK-NEXT: vmov.u8 r2, q4[15]
|
||||
; CHECK-NEXT: vmov.16 q5[7], r2
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: vpsel q2, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[2]
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[0]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r6, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[3]
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[1]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r6, r2
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: and r5, r2, #1
|
||||
; CHECK-NEXT: ubfx r6, r2, #4, #1
|
||||
; CHECK-NEXT: vmov r4, s22
|
||||
; CHECK-NEXT: vmov r5, s23
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, r2, lr
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[6]
|
||||
; CHECK-NEXT: adc.w r12, r2, r5
|
||||
; CHECK-NEXT: ubfx r5, r6, #12, #1
|
||||
; CHECK-NEXT: ubfx r6, r6, #8, #1
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r5, r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q1[9]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[9]
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[6]
|
||||
; CHECK-NEXT: vmov q5[2], q5[0], r6, r5
|
||||
; CHECK-NEXT: smull r2, r4, r2, r4
|
||||
; CHECK-NEXT: vmov q5[3], q5[1], r6, r5
|
||||
; CHECK-NEXT: vmov.s8 r6, q1[7]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[7]
|
||||
; CHECK-NEXT: smull r6, r5, r5, r6
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r3, r6
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r4, r5
|
||||
; CHECK-NEXT: vand q3, q4, q3
|
||||
; CHECK-NEXT: vmov r4, s12
|
||||
; CHECK-NEXT: vmov r3, s13
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: adds.w r6, r12, r4
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-NEXT: adds r6, r6, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[10]
|
||||
; CHECK-NEXT: adc.w r12, r3, r5
|
||||
; CHECK-NEXT: ubfx r5, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: vmov q6[2], q6[0], r2, r6
|
||||
; CHECK-NEXT: vmov q6[3], q6[1], r4, r5
|
||||
; CHECK-NEXT: vand q5, q6, q5
|
||||
; CHECK-NEXT: vmov r6, s20
|
||||
; CHECK-NEXT: vmov r2, s21
|
||||
; CHECK-NEXT: vmov r5, s23
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: adc.w r6, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s22
|
||||
; CHECK-NEXT: adds.w r12, r3, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[8]
|
||||
; CHECK-NEXT: adc.w lr, r6, r5
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[8]
|
||||
; CHECK-NEXT: vmov.16 q5[0], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[9]
|
||||
; CHECK-NEXT: vmov.16 q5[1], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[10]
|
||||
; CHECK-NEXT: vmov.16 q5[2], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[11]
|
||||
; CHECK-NEXT: vmov.16 q5[3], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[12]
|
||||
; CHECK-NEXT: vmov.16 q5[4], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[13]
|
||||
; CHECK-NEXT: vmov.16 q5[5], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[14]
|
||||
; CHECK-NEXT: vmov.16 q5[6], r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q4[15]
|
||||
; CHECK-NEXT: vmov.16 q5[7], r6
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: vcmp.i16 ne, q5, zr
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vpsel q2, q3, q2
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[2]
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[0]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[3]
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[1]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r5, r6
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r6, p0
|
||||
; CHECK-NEXT: and r4, r6, #1
|
||||
; CHECK-NEXT: ubfx r5, r6, #4, #1
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[10]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[11]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[11]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: smull r2, r5, r5, r2
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r3, r2
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r4, r5
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[12]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r4, r5
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r4, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[9]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[9]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r2, r5
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r3, r4
|
||||
; CHECK-NEXT: vand q3, q4, q3
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov r3, s12
|
||||
; CHECK-NEXT: vmov r2, s13
|
||||
; CHECK-NEXT: vmov r5, s14
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: vmov r6, s15
|
||||
; CHECK-NEXT: adc.w r2, r2, r12
|
||||
; CHECK-NEXT: adds.w r12, r3, r5
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[12]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: adc.w lr, r2, r6
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[6]
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[4]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r6, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q2[7]
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[5]
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r6, r2
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r2, p0
|
||||
; CHECK-NEXT: and r5, r2, #1
|
||||
; CHECK-NEXT: ubfx r6, r2, #4, #1
|
||||
; CHECK-NEXT: vmov r4, s14
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, r2, lr
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[10]
|
||||
; CHECK-NEXT: adc.w r12, r2, r5
|
||||
; CHECK-NEXT: ubfx r5, r6, #12, #1
|
||||
; CHECK-NEXT: ubfx r6, r6, #8, #1
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r5, r6
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r5, r6
|
||||
; CHECK-NEXT: vmov.u8 r6, q1[13]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[13]
|
||||
; CHECK-NEXT: sxtb r6, r6
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[10]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r6, r5
|
||||
; CHECK-NEXT: smull r2, r4, r2, r4
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r6, r5
|
||||
; CHECK-NEXT: vmov.s8 r6, q1[11]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[11]
|
||||
; CHECK-NEXT: smull r6, r5, r5, r6
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r3, r6
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r4, r5
|
||||
; CHECK-NEXT: vand q2, q3, q2
|
||||
; CHECK-NEXT: vmov r4, s8
|
||||
; CHECK-NEXT: vmov r3, s9
|
||||
; CHECK-NEXT: vmov r5, s11
|
||||
; CHECK-NEXT: adds.w r6, r12, r4
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: adc.w r3, r3, lr
|
||||
; CHECK-NEXT: adds r6, r6, r4
|
||||
; CHECK-NEXT: vmov.u8 r4, q1[14]
|
||||
; CHECK-NEXT: adc.w r12, r3, r5
|
||||
; CHECK-NEXT: ubfx r5, r2, #12, #1
|
||||
; CHECK-NEXT: ubfx r2, r2, #8, #1
|
||||
; CHECK-NEXT: vmov q4[2], q4[0], r2, r6
|
||||
; CHECK-NEXT: vmov q4[3], q4[1], r4, r5
|
||||
; CHECK-NEXT: vand q3, q4, q3
|
||||
; CHECK-NEXT: vmov r6, s12
|
||||
; CHECK-NEXT: vmov r2, s13
|
||||
; CHECK-NEXT: vmov r5, s15
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
; CHECK-NEXT: adc.w r6, r12, r2
|
||||
; CHECK-NEXT: vmov r2, s14
|
||||
; CHECK-NEXT: adds.w r12, r3, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q1[12]
|
||||
; CHECK-NEXT: adc.w lr, r6, r5
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[6]
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[4]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r5, r6
|
||||
; CHECK-NEXT: vmov.u16 r6, q2[7]
|
||||
; CHECK-NEXT: vmov.u16 r5, q2[5]
|
||||
; CHECK-NEXT: smull r2, r3, r3, r2
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r5, r6
|
||||
; CHECK-NEXT: vcmp.i32 ne, q3, zr
|
||||
; CHECK-NEXT: vmrs r6, p0
|
||||
; CHECK-NEXT: and r4, r6, #1
|
||||
; CHECK-NEXT: ubfx r5, r6, #4, #1
|
||||
; CHECK-NEXT: rsbs r4, r4, #0
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: vmov.u8 r3, q0[14]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r2, r5
|
||||
; CHECK-NEXT: sxtb r4, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r2, r5
|
||||
; CHECK-NEXT: vmov.u8 r2, q1[15]
|
||||
; CHECK-NEXT: vmov.u8 r5, q0[15]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r5, r5
|
||||
; CHECK-NEXT: smull r2, r5, r5, r2
|
||||
; CHECK-NEXT: smull r3, r4, r3, r4
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r4, r5
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r4, r5
|
||||
; CHECK-NEXT: vmov.s8 r5, q1[13]
|
||||
; CHECK-NEXT: vmov.s8 r4, q0[13]
|
||||
; CHECK-NEXT: smull r5, r4, r4, r5
|
||||
; CHECK-NEXT: vmov q3[2], q3[0], r2, r5
|
||||
; CHECK-NEXT: vmov q3[3], q3[1], r3, r4
|
||||
; CHECK-NEXT: vand q2, q3, q2
|
||||
; CHECK-NEXT: vmov r3, s8
|
||||
; CHECK-NEXT: vmov r2, s9
|
||||
; CHECK-NEXT: vmov r4, s10
|
||||
; CHECK-NEXT: vmov r5, s11
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, r2, lr
|
||||
; CHECK-NEXT: adds r3, r3, r4
|
||||
; CHECK-NEXT: vmov.s8 r4, q1[14]
|
||||
; CHECK-NEXT: adc.w r12, r2, r5
|
||||
; CHECK-NEXT: ubfx r5, r6, #12, #1
|
||||
; CHECK-NEXT: ubfx r6, r6, #8, #1
|
||||
; CHECK-NEXT: rsbs r5, r5, #0
|
||||
; CHECK-NEXT: rsbs r6, r6, #0
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[14]
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r6, r5
|
||||
; CHECK-NEXT: smull r2, r4, r2, r4
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r6, r5
|
||||
; CHECK-NEXT: vmov.s8 r6, q1[15]
|
||||
; CHECK-NEXT: vmov.s8 r5, q0[15]
|
||||
; CHECK-NEXT: smull r6, r5, r5, r6
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r6
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
|
||||
; CHECK-NEXT: vand q0, q0, q2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r5, s2
|
||||
; CHECK-NEXT: adds r3, r3, r6
|
||||
|
Loading…
Reference in New Issue
Block a user