mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Expand VMOVRRD simplification pattern
This expands the VMOVRRD(extract(..(build_vector(a, b, c, d)))) pattern, to also handle insert_vectors. Providing we can find the correct insert, this helps further simplify patterns by removing the redundant VMOVRRD. Differential Revision: https://reviews.llvm.org/D100245
This commit is contained in:
parent
bd921d06d1
commit
746a7315fd
@ -13695,22 +13695,51 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
|
||||
}
|
||||
|
||||
// VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
|
||||
// VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
|
||||
if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
isa<ConstantSDNode>(InDouble.getOperand(1))) {
|
||||
SDValue BV = InDouble.getOperand(0);
|
||||
// Look up through any nop bitcasts
|
||||
while (BV.getOpcode() == ISD::BITCAST &&
|
||||
(BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64))
|
||||
// Look up through any nop bitcasts and vector_reg_casts. bitcasts may
|
||||
// change lane order under big endian.
|
||||
bool BVSwap = BV.getOpcode() == ISD::BITCAST;
|
||||
while (
|
||||
(BV.getOpcode() == ISD::BITCAST ||
|
||||
BV.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
|
||||
(BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
|
||||
BVSwap = BV.getOpcode() == ISD::BITCAST;
|
||||
BV = BV.getOperand(0);
|
||||
if (BV.getValueType() != MVT::v4i32 || BV.getOpcode() != ISD::BUILD_VECTOR)
|
||||
}
|
||||
if (BV.getValueType() != MVT::v4i32)
|
||||
return SDValue();
|
||||
|
||||
// Handle buildvectors, pulling out the correct lane depending on
|
||||
// endianness.
|
||||
unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
|
||||
if (Subtarget->isLittle())
|
||||
return DCI.DAG.getMergeValues(
|
||||
{BV.getOperand(Offset), BV.getOperand(Offset + 1)}, SDLoc(N));
|
||||
else
|
||||
return DCI.DAG.getMergeValues(
|
||||
{BV.getOperand(Offset + 1), BV.getOperand(Offset)}, SDLoc(N));
|
||||
if (BV.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
SDValue Op0 = BV.getOperand(Offset);
|
||||
SDValue Op1 = BV.getOperand(Offset + 1);
|
||||
if (!Subtarget->isLittle() && BVSwap)
|
||||
std::swap(Op0, Op1);
|
||||
|
||||
return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
|
||||
}
|
||||
|
||||
// A chain of insert_vectors, grabbing the correct value of the chain of
|
||||
// inserts.
|
||||
SDValue Op0, Op1;
|
||||
while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
|
||||
if (isa<ConstantSDNode>(BV.getOperand(2))) {
|
||||
if (BV.getConstantOperandVal(2) == Offset)
|
||||
Op0 = BV.getOperand(1);
|
||||
if (BV.getConstantOperandVal(2) == Offset + 1)
|
||||
Op1 = BV.getOperand(1);
|
||||
}
|
||||
BV = BV.getOperand(0);
|
||||
}
|
||||
if (!Subtarget->isLittle() && BVSwap)
|
||||
std::swap(Op0, Op1);
|
||||
if (Op0 && Op1)
|
||||
return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -44,9 +44,14 @@ define <4 x i32> @h(<4 x i8> *%in) {
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
|
||||
; CHECK-NEXT: vmovl.u8 q8, d16
|
||||
; CHECK-NEXT: vmovl.u16 q8, d16
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[0]
|
||||
; CHECK-NEXT: vmov.u16 r1, d16[1]
|
||||
; CHECK-NEXT: vmov.u16 r2, d16[2]
|
||||
; CHECK-NEXT: vmov.u16 r3, d16[3]
|
||||
; CHECK-NEXT: uxtb r0, r0
|
||||
; CHECK-NEXT: uxtb r1, r1
|
||||
; CHECK-NEXT: uxtb r2, r2
|
||||
; CHECK-NEXT: uxtb r3, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
%1 = load <4 x i8>, <4 x i8>* %in, align 4
|
||||
%2 = extractelement <4 x i8> %1, i32 0
|
||||
|
@ -100,9 +100,9 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
|
||||
define <4 x i32> @v_dupQ32(i32 %A) nounwind {
|
||||
; CHECK-LABEL: v_dupQ32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vdup.32 q8, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: mov r2, r0
|
||||
; CHECK-NEXT: mov r3, r0
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
|
||||
%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
|
||||
@ -379,10 +379,9 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
|
||||
define <4 x i32> @tdupi(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: tdupi:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vdup.32 q8, r0
|
||||
; CHECK-NEXT: vmov.32 d17[1], r1
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: mov r2, r0
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%1 = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%2 = insertelement <4 x i32> %1, i32 %x, i32 1
|
||||
@ -412,11 +411,10 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
|
||||
; CHECK-LABEL: tduplane:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov d16, r0, r1
|
||||
; CHECK-NEXT: mov r0, #255
|
||||
; CHECK-NEXT: vdup.32 q8, d16[1]
|
||||
; CHECK-NEXT: vmov.32 d17[1], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov r3, #255
|
||||
; CHECK-NEXT: vmov.32 r0, d16[1]
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: mov r2, r0
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%in = extractelement <4 x i32> %invec, i32 1
|
||||
%1 = insertelement <4 x i32> undef, i32 %in, i32 0
|
||||
|
@ -524,19 +524,16 @@ entry:
|
||||
define <4 x i32> @insertextract(i32 %x, i32 %y) {
|
||||
; CHECK-LE-LABEL: insertextract:
|
||||
; CHECK-LE: @ %bb.0:
|
||||
; CHECK-LE-NEXT: vdup.32 q0, r0
|
||||
; CHECK-LE-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-LE-NEXT: vmov r0, r1, d0
|
||||
; CHECK-LE-NEXT: vmov r2, r3, d1
|
||||
; CHECK-LE-NEXT: mov r3, r1
|
||||
; CHECK-LE-NEXT: mov r1, r0
|
||||
; CHECK-LE-NEXT: mov r2, r0
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: insertextract:
|
||||
; CHECK-BE: @ %bb.0:
|
||||
; CHECK-BE-NEXT: vdup.32 q0, r0
|
||||
; CHECK-BE-NEXT: vmov.32 q0[3], r1
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov r1, r0, d2
|
||||
; CHECK-BE-NEXT: vmov r3, r2, d3
|
||||
; CHECK-BE-NEXT: mov r3, r1
|
||||
; CHECK-BE-NEXT: mov r1, r0
|
||||
; CHECK-BE-NEXT: mov r2, r0
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
%1 = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%2 = insertelement <4 x i32> %1, i32 %x, i32 1
|
||||
|
@ -52,16 +52,11 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x) {
|
||||
; CHECK-LABEL: add_v2i32_v2i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r1, s2
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r3, r1, asr #31
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i32> %x to <2 x i64>
|
||||
@ -174,51 +169,28 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
|
||||
; CHECK-LABEL: add_v8i16_v8i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.s16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r1, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.s16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[1]
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[2]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[3]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[4]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[5]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w r12, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[6]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[7]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r0, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov r0, r3, d0
|
||||
; CHECK-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -269,18 +241,13 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x) {
|
||||
; CHECK-LABEL: add_v2i16_v2i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: sxth r1, r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r3, r1, asr #31
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i16> %x to <2 x i64>
|
||||
@ -525,99 +492,52 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
|
||||
; CHECK-LABEL: add_v16i8_v16i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r1, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: vmov.s8 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[1]
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[2]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[3]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[2]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[4]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[5]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[6]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[7]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[8]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[9]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[10]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[11]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[10]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[12]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[13]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w r12, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[14]
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[15]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[14]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r0, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov r0, r3, d0
|
||||
; CHECK-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -675,59 +595,36 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
|
||||
; CHECK-LABEL: add_v8i8_v8i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r1, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: sxtb r1, r1
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: asrs r1, r1, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d3
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[5]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w r12, r0, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[6]
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[6]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r0, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
|
||||
; CHECK-NEXT: vmov r0, r3, d0
|
||||
; CHECK-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -780,18 +677,13 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x) {
|
||||
; CHECK-LABEL: add_v2i8_v2i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: sxtb r1, r0
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: asrs r2, r1, #31
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
||||
; CHECK-NEXT: asrs r0, r0, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
|
||||
; CHECK-NEXT: vmov r0, s2
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r3, r1, asr #31
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i8> %x to <2 x i64>
|
||||
@ -871,21 +763,14 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, lr, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i32> %x to <2 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
|
||||
@ -1008,57 +893,34 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[0]
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[1]
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: adds.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[2]
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds.w r12, lr, r2
|
||||
; CHECK-NEXT: adc.w r2, r3, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[3]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s16 r2, q0[7]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[5]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r4, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r4
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adc.w r4, r4, r12
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r4, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w lr, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s16 r3, q0[7]
|
||||
; CHECK-NEXT: adds.w r2, r12, r3
|
||||
; CHECK-NEXT: adc.w r3, lr, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%xx = sext <8 x i16> %x to <8 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
|
||||
@ -1113,23 +975,16 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, lr, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i16> %x to <2 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
|
||||
@ -1390,105 +1245,58 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[0]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[3]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[0]
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[1]
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: adds.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[2]
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds.w r12, lr, r2
|
||||
; CHECK-NEXT: adc.w r2, r3, r2, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[3]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[4]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[7]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[5]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[6]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[9]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[7]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[8]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[11]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[9]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[10]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[13]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[11]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[12]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.s8 r2, q0[15]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[13]
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[14]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r4, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r4
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adc.w r4, r4, r12
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r4, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w lr, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.s8 r3, q0[15]
|
||||
; CHECK-NEXT: adds.w r2, r12, r3
|
||||
; CHECK-NEXT: adc.w r3, lr, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%xx = sext <16 x i8> %x to <16 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
|
||||
@ -1550,65 +1358,42 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v8i8_v8i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[1]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[0]
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[0]
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[1]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov lr, r12, d3
|
||||
; CHECK-NEXT: vmov r3, r2, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[2]
|
||||
; CHECK-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[3]
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: adds.w lr, r2, r3
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[2]
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: adds.w r12, lr, r2
|
||||
; CHECK-NEXT: adc.w r2, r3, r2, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[3]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[5]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[4]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
|
||||
; CHECK-NEXT: vmov r2, r3, d2
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: vmov r2, r3, d3
|
||||
; CHECK-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-NEXT: vmov.u16 r2, q0[7]
|
||||
; CHECK-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[5]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[6]
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r4, r2, #31
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r4
|
||||
; CHECK-NEXT: vmov r3, r4, d0
|
||||
; CHECK-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: adc.w r4, r4, r12
|
||||
; CHECK-NEXT: adds.w r3, r3, lr
|
||||
; CHECK-NEXT: adc.w r2, r4, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w lr, r2, r3, asr #31
|
||||
; CHECK-NEXT: vmov.u16 r3, q0[7]
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: adds.w r2, r12, r3
|
||||
; CHECK-NEXT: adc.w r3, lr, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%xx = sext <8 x i8> %x to <8 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
|
||||
@ -1665,23 +1450,16 @@ entry:
|
||||
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) {
|
||||
; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||
; CHECK-NEXT: asrs r3, r3, #31
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
|
||||
; CHECK-NEXT: vmov r12, s2
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: adds.w r3, r3, r12
|
||||
; CHECK-NEXT: adc.w r2, lr, r2, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r3
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i8> %x to <2 x i64>
|
||||
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user