mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] MVE big endian loads/stores
This adds some missing patterns for big endian loads/stores, allowing unaligned loads/stores to also be selected with an extra VREV, which produces better code than aligning through a stack. Also moves VLDR_P0 to not be LE only, and adjusts some of the tests to show all that working. Differential Revision: https://reviews.llvm.org/D65583 llvm-svn: 368304
This commit is contained in:
parent
db4202796c
commit
3265a2671b
@ -14075,45 +14075,21 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
|
||||
Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
|
||||
Ty != MVT::v2f64)
|
||||
return false;
|
||||
|
||||
if (Subtarget->isLittle()) {
|
||||
// In little-endian MVE, the store instructions VSTRB.U8,
|
||||
// VSTRH.U16 and VSTRW.U32 all store the vector register in
|
||||
// exactly the same format, and differ only in the range of
|
||||
// their immediate offset field and the required alignment.
|
||||
//
|
||||
// In particular, VSTRB.U8 can store a vector at byte alignment.
|
||||
// So at this stage we can simply say that loads/stores of all
|
||||
// 128-bit wide vector types are permitted at any alignment,
|
||||
// because we know at least _one_ instruction can manage that.
|
||||
//
|
||||
// Later on we might find that some of those loads are better
|
||||
// generated as VLDRW.U32 if alignment permits, to take
|
||||
// advantage of the larger immediate range. But for the moment,
|
||||
// all that matters is that if we don't lower the load then
|
||||
// _some_ instruction can handle it.
|
||||
// In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
|
||||
// VSTRW.U32 all store the vector register in exactly the same format, and
|
||||
// differ only in the range of their immediate offset field and the required
|
||||
// alignment. So there is always a store that can be used, regardless of
|
||||
// actual type.
|
||||
//
|
||||
// For big endian, that is not the case. But can still emit a (VSTRB.U8;
|
||||
// VREV64.8) pair and get the same effect. This will likely be better than
|
||||
// aligning the vector through the stack.
|
||||
if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
|
||||
Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
|
||||
Ty == MVT::v2f64) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
return true;
|
||||
} else {
|
||||
// In big-endian MVE, those instructions aren't so similar
|
||||
// after all, because they reorder the bytes of the vector
|
||||
// differently. So this time we can only store a particular
|
||||
// kind of vector if its alignment is at least the element
|
||||
// type. And we can't store vectors of i64 or f64 at all
|
||||
// without having to do some postprocessing, because there's
|
||||
// no VSTRD.U64.
|
||||
if (Ty == MVT::v16i8 ||
|
||||
((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
|
||||
((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -4820,13 +4820,6 @@ let Predicates = [HasMVEInt, IsLE] in {
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
|
||||
defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
|
||||
|
||||
def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt, IsBE] in {
|
||||
@ -4841,6 +4834,41 @@ let Predicates = [HasMVEInt, IsBE] in {
|
||||
def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
|
||||
def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
|
||||
def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
|
||||
|
||||
// Other unaligned loads/stores need to go though a VREV
|
||||
def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
|
||||
(v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
||||
def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
|
||||
(v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -29,47 +29,14 @@ define void @load_load_add_store_align1(<4 x i32> *%src1, <4 x i32> *%src2) {
|
||||
;
|
||||
; CHECK-BE-LABEL: load_load_add_store_align1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: .setfp r7, sp, #8
|
||||
; CHECK-BE-NEXT: add r7, sp, #8
|
||||
; CHECK-BE-NEXT: .pad #48
|
||||
; CHECK-BE-NEXT: sub sp, #48
|
||||
; CHECK-BE-NEXT: mov r4, sp
|
||||
; CHECK-BE-NEXT: bfc r4, #0, #4
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: ldr.w r12, [r1]
|
||||
; CHECK-BE-NEXT: ldr r3, [r1, #4]
|
||||
; CHECK-BE-NEXT: ldr r2, [r1, #8]
|
||||
; CHECK-BE-NEXT: ldr r1, [r1, #12]
|
||||
; CHECK-BE-NEXT: strd r2, r1, [sp, #24]
|
||||
; CHECK-BE-NEXT: mov r1, r0
|
||||
; CHECK-BE-NEXT: strd r12, r3, [sp, #16]
|
||||
; CHECK-BE-NEXT: ldr r2, [r1, #4]!
|
||||
; CHECK-BE-NEXT: str r2, [sp, #4]
|
||||
; CHECK-BE-NEXT: ldr r2, [r0]
|
||||
; CHECK-BE-NEXT: str r2, [sp]
|
||||
; CHECK-BE-NEXT: mov r2, r1
|
||||
; CHECK-BE-NEXT: ldr r3, [r2, #4]!
|
||||
; CHECK-BE-NEXT: str r3, [sp, #8]
|
||||
; CHECK-BE-NEXT: ldr r3, [r2, #4]
|
||||
; CHECK-BE-NEXT: str r3, [sp, #12]
|
||||
; CHECK-BE-NEXT: add r3, sp, #16
|
||||
; CHECK-BE-NEXT: vldrw.u32 q0, [r3]
|
||||
; CHECK-BE-NEXT: mov r3, sp
|
||||
; CHECK-BE-NEXT: vldrw.u32 q1, [r3]
|
||||
; CHECK-BE-NEXT: add r3, sp, #32
|
||||
; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vrev32.8 q1, q1
|
||||
; CHECK-BE-NEXT: vadd.i32 q0, q1, q0
|
||||
; CHECK-BE-NEXT: vstrw.32 q0, [r3]
|
||||
; CHECK-BE-NEXT: ldrd r3, r4, [sp, #40]
|
||||
; CHECK-BE-NEXT: ldrd r12, lr, [sp, #32]
|
||||
; CHECK-BE-NEXT: str r4, [r2, #4]
|
||||
; CHECK-BE-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-BE-NEXT: str r3, [r2]
|
||||
; CHECK-BE-NEXT: str.w lr, [r1]
|
||||
; CHECK-BE-NEXT: str.w r12, [r0]
|
||||
; CHECK-BE-NEXT: mov sp, r4
|
||||
; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%l1 = load <4 x i32>, <4 x i32>* %src1, align 1
|
||||
%l2 = load <4 x i32>, <4 x i32>* %src2, align 1
|
||||
|
@ -1,72 +1,138 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(<4 x i32>* %vp) {
|
||||
; CHECK-LABEL: load_4xi32_a4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: load_4xi32_a4:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_4xi32_a4:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %vp, align 4
|
||||
ret <4 x i32> %0
|
||||
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(<4 x i32>* %vp) {
|
||||
; CHECK-LABEL: load_4xi32_a2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrh.u16 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: load_4xi32_a2:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldrh.u16 q0, [r0]
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_4xi32_a2:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %vp, align 2
|
||||
ret <4 x i32> %0
|
||||
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(<4 x i32>* %vp) {
|
||||
; CHECK-LABEL: load_4xi32_a1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: load_4xi32_a1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vldrb.u8 q0, [r0]
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_4xi32_a1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %vp, align 1
|
||||
ret <4 x i32> %0
|
||||
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc void @store_4xi32_a4(<4 x i32>* %vp, <4 x i32> %val) {
|
||||
; CHECK-LABEL: store_4xi32_a4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: store_4xi32_a4:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_4xi32_a4:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
||||
; CHECK-BE-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
store <4 x i32> %val, <4 x i32>* %vp, align 4
|
||||
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
||||
store <4 x i32> %0, <4 x i32>* %vp, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc void @store_4xi32_a2(<4 x i32>* %vp, <4 x i32> %val) {
|
||||
; CHECK-LABEL: store_4xi32_a2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vstrh.16 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: store_4xi32_a2:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: vstrh.16 q0, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_4xi32_a2:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
store <4 x i32> %val, <4 x i32>* %vp, align 2
|
||||
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
||||
store <4 x i32> %0, <4 x i32>* %vp, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc void @store_4xi32_a1(<4 x i32>* %vp, <4 x i32> %val) {
|
||||
; CHECK-LABEL: store_4xi32_a1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: store_4xi32_a1:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
||||
; CHECK-LE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: store_4xi32_a1:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
||||
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
||||
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
store <4 x i32> %val, <4 x i32>* %vp, align 1
|
||||
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
||||
store <4 x i32> %0, <4 x i32>* %vp, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(i32* %ip) {
|
||||
; CHECK-LABEL: load_4xi32_a4_offset_pos:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: add.w r0, r0, #508
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: load_4xi32_a4_offset_pos:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: add.w r0, r0, #508
|
||||
; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_4xi32_a4_offset_pos:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: add.w r0, r0, #508
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 127
|
||||
%vp = bitcast i32* %ipoffset to <4 x i32>*
|
||||
@ -75,11 +141,18 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(i32* %ip) {
|
||||
; CHECK-LABEL: load_4xi32_a4_offset_neg:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: sub.w r0, r0, #508
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: load_4xi32_a4_offset_neg:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: sub.w r0, r0, #508
|
||||
; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: load_4xi32_a4_offset_neg:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: sub.w r0, r0, #508
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 -127
|
||||
%vp = bitcast i32* %ipoffset to <4 x i32>*
|
||||
@ -88,19 +161,34 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() {
|
||||
; CHECK-LABEL: loadstore_4xi32_stack_off16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #40
|
||||
; CHECK-NEXT: sub sp, #40
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x1
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-NEXT: movs r0, #3
|
||||
; CHECK-NEXT: vstrw.32 q0, [sp, #16]
|
||||
; CHECK-NEXT: str r0, [sp, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp, #16]
|
||||
; CHECK-NEXT: add sp, #40
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: loadstore_4xi32_stack_off16:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #40
|
||||
; CHECK-LE-NEXT: sub sp, #40
|
||||
; CHECK-LE-NEXT: vmov.i32 q0, #0x1
|
||||
; CHECK-LE-NEXT: mov r0, sp
|
||||
; CHECK-LE-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-LE-NEXT: movs r0, #3
|
||||
; CHECK-LE-NEXT: vstrw.32 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: str r0, [sp, #16]
|
||||
; CHECK-LE-NEXT: vldrw.u32 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: add sp, #40
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: loadstore_4xi32_stack_off16:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #40
|
||||
; CHECK-BE-NEXT: sub sp, #40
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
|
||||
; CHECK-BE-NEXT: mov r0, sp
|
||||
; CHECK-BE-NEXT: vstrw.32 q0, [r0]
|
||||
; CHECK-BE-NEXT: movs r0, #3
|
||||
; CHECK-BE-NEXT: vstrw.32 q0, [sp, #16]
|
||||
; CHECK-BE-NEXT: str r0, [sp, #16]
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #40
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = alloca [1 x [5 x [2 x i32]]], align 4
|
||||
%0 = bitcast [1 x [5 x [2 x i32]]]* %c to i8*
|
||||
@ -116,19 +204,34 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() {
|
||||
; CHECK-LABEL: loadstore_8xi16_stack_off16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #40
|
||||
; CHECK-NEXT: sub sp, #40
|
||||
; CHECK-NEXT: vmov.i16 q0, #0x1
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrh.16 q0, [r0]
|
||||
; CHECK-NEXT: movs r0, #3
|
||||
; CHECK-NEXT: vstrh.16 q0, [sp, #16]
|
||||
; CHECK-NEXT: strh.w r0, [sp, #16]
|
||||
; CHECK-NEXT: vldrh.u16 q0, [sp, #16]
|
||||
; CHECK-NEXT: add sp, #40
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: loadstore_8xi16_stack_off16:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #40
|
||||
; CHECK-LE-NEXT: sub sp, #40
|
||||
; CHECK-LE-NEXT: vmov.i16 q0, #0x1
|
||||
; CHECK-LE-NEXT: mov r0, sp
|
||||
; CHECK-LE-NEXT: vstrh.16 q0, [r0]
|
||||
; CHECK-LE-NEXT: movs r0, #3
|
||||
; CHECK-LE-NEXT: vstrh.16 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: strh.w r0, [sp, #16]
|
||||
; CHECK-LE-NEXT: vldrh.u16 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: add sp, #40
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: loadstore_8xi16_stack_off16:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #40
|
||||
; CHECK-BE-NEXT: sub sp, #40
|
||||
; CHECK-BE-NEXT: vmov.i16 q0, #0x1
|
||||
; CHECK-BE-NEXT: mov r0, sp
|
||||
; CHECK-BE-NEXT: vstrh.16 q0, [r0]
|
||||
; CHECK-BE-NEXT: movs r0, #3
|
||||
; CHECK-BE-NEXT: vstrh.16 q0, [sp, #16]
|
||||
; CHECK-BE-NEXT: strh.w r0, [sp, #16]
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #40
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = alloca [1 x [10 x [2 x i16]]], align 2
|
||||
%0 = bitcast [1 x [10 x [2 x i16]]]* %c to i8*
|
||||
@ -144,19 +247,34 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() {
|
||||
; CHECK-LABEL: loadstore_16xi8_stack_off16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #40
|
||||
; CHECK-NEXT: sub sp, #40
|
||||
; CHECK-NEXT: vmov.i8 q0, #0x1
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-NEXT: movs r0, #3
|
||||
; CHECK-NEXT: vstrb.8 q0, [sp, #16]
|
||||
; CHECK-NEXT: strb.w r0, [sp, #16]
|
||||
; CHECK-NEXT: vldrb.u8 q0, [sp, #16]
|
||||
; CHECK-NEXT: add sp, #40
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-LE-LABEL: loadstore_16xi8_stack_off16:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .pad #40
|
||||
; CHECK-LE-NEXT: sub sp, #40
|
||||
; CHECK-LE-NEXT: vmov.i8 q0, #0x1
|
||||
; CHECK-LE-NEXT: mov r0, sp
|
||||
; CHECK-LE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-LE-NEXT: movs r0, #3
|
||||
; CHECK-LE-NEXT: vstrb.8 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: strb.w r0, [sp, #16]
|
||||
; CHECK-LE-NEXT: vldrb.u8 q0, [sp, #16]
|
||||
; CHECK-LE-NEXT: add sp, #40
|
||||
; CHECK-LE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-BE-LABEL: loadstore_16xi8_stack_off16:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .pad #40
|
||||
; CHECK-BE-NEXT: sub sp, #40
|
||||
; CHECK-BE-NEXT: vmov.i8 q0, #0x1
|
||||
; CHECK-BE-NEXT: mov r0, sp
|
||||
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
||||
; CHECK-BE-NEXT: movs r0, #3
|
||||
; CHECK-BE-NEXT: vstrb.8 q0, [sp, #16]
|
||||
; CHECK-BE-NEXT: strb.w r0, [sp, #16]
|
||||
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #40
|
||||
; CHECK-BE-NEXT: bx lr
|
||||
entry:
|
||||
%c = alloca [1 x [20 x [2 x i8]]], align 1
|
||||
%0 = bitcast [1 x [20 x [2 x i8]]]* %c to i8*
|
||||
|
@ -1,81 +1,165 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
||||
|
||||
declare arm_aapcs_vfpcc <4 x i32> @ext_i32()
|
||||
declare arm_aapcs_vfpcc <8 x i16> @ext_i16()
|
||||
declare arm_aapcs_vfpcc <16 x i8> @ext_i8()
|
||||
declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
|
||||
declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
|
||||
declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
|
||||
; CHECK-LABEL: shuffle1_v4i32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: bl ext_i32
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-LE-LABEL: shuffle1_v4i32:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-LE-NEXT: vpush {d8, d9}
|
||||
; CHECK-LE-NEXT: .pad #8
|
||||
; CHECK-LE-NEXT: sub sp, #8
|
||||
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-LE-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-LE-NEXT: vmov q4, q1
|
||||
; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-LE-NEXT: bl ext_i32
|
||||
; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-LE-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-LE-NEXT: add sp, #8
|
||||
; CHECK-LE-NEXT: vpop {d8, d9}
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: shuffle1_v4i32:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-BE-NEXT: vpush {d8, d9}
|
||||
; CHECK-BE-NEXT: .pad #8
|
||||
; CHECK-BE-NEXT: sub sp, #8
|
||||
; CHECK-BE-NEXT: vrev64.32 q4, q1
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
|
||||
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q0
|
||||
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: bl ext_i32
|
||||
; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q1
|
||||
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #8
|
||||
; CHECK-BE-NEXT: vpop {d8, d9}
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%c = icmp eq <4 x i32> %src, zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32()
|
||||
%s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1)
|
||||
%s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
|
||||
ret <4 x i32> %s
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
|
||||
; CHECK-LABEL: shuffle1_v8i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.i16 eq, q0, zr
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: bl ext_i16
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-LE-LABEL: shuffle1_v8i16:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-LE-NEXT: vpush {d8, d9}
|
||||
; CHECK-LE-NEXT: .pad #8
|
||||
; CHECK-LE-NEXT: sub sp, #8
|
||||
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-LE-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-LE-NEXT: vmov q4, q1
|
||||
; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-LE-NEXT: bl ext_i16
|
||||
; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-LE-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-LE-NEXT: add sp, #8
|
||||
; CHECK-LE-NEXT: vpop {d8, d9}
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: shuffle1_v8i16:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-BE-NEXT: vpush {d8, d9}
|
||||
; CHECK-BE-NEXT: .pad #8
|
||||
; CHECK-BE-NEXT: sub sp, #8
|
||||
; CHECK-BE-NEXT: vrev64.16 q4, q1
|
||||
; CHECK-BE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-BE-NEXT: vrev64.16 q2, q0
|
||||
; CHECK-BE-NEXT: vrev32.16 q1, q1
|
||||
; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q1
|
||||
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-BE-NEXT: vrev64.16 q0, q1
|
||||
; CHECK-BE-NEXT: bl ext_i16
|
||||
; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-BE-NEXT: vrev64.16 q1, q0
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q1
|
||||
; CHECK-BE-NEXT: vrev64.16 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #8
|
||||
; CHECK-BE-NEXT: vpop {d8, d9}
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%c = icmp eq <8 x i16> %src, zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16()
|
||||
%s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1)
|
||||
%s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
|
||||
ret <8 x i16> %s
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
|
||||
; CHECK-LABEL: shuffle1_v16i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.i8 eq, q0, zr
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: bl ext_i8
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-LE-LABEL: shuffle1_v16i8:
|
||||
; CHECK-LE: @ %bb.0: @ %entry
|
||||
; CHECK-LE-NEXT: .save {r7, lr}
|
||||
; CHECK-LE-NEXT: push {r7, lr}
|
||||
; CHECK-LE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-LE-NEXT: vpush {d8, d9}
|
||||
; CHECK-LE-NEXT: .pad #8
|
||||
; CHECK-LE-NEXT: sub sp, #8
|
||||
; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
|
||||
; CHECK-LE-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-LE-NEXT: vpsel q0, q1, q0
|
||||
; CHECK-LE-NEXT: vmov q4, q1
|
||||
; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-LE-NEXT: bl ext_i8
|
||||
; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-LE-NEXT: vpsel q0, q4, q0
|
||||
; CHECK-LE-NEXT: add sp, #8
|
||||
; CHECK-LE-NEXT: vpop {d8, d9}
|
||||
; CHECK-LE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: shuffle1_v16i8:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: .vsave {d8, d9}
|
||||
; CHECK-BE-NEXT: vpush {d8, d9}
|
||||
; CHECK-BE-NEXT: .pad #8
|
||||
; CHECK-BE-NEXT: sub sp, #8
|
||||
; CHECK-BE-NEXT: vrev64.8 q4, q1
|
||||
; CHECK-BE-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-BE-NEXT: vrev64.8 q2, q0
|
||||
; CHECK-BE-NEXT: vrev32.8 q1, q1
|
||||
; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q1
|
||||
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: bl ext_i8
|
||||
; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-BE-NEXT: vrev64.8 q1, q0
|
||||
; CHECK-BE-NEXT: vpsel q1, q4, q1
|
||||
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
||||
; CHECK-BE-NEXT: add sp, #8
|
||||
; CHECK-BE-NEXT: vpop {d8, d9}
|
||||
; CHECK-BE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%c = icmp eq <16 x i8> %src, zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8()
|
||||
%s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
|
||||
%ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1)
|
||||
%s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext
|
||||
ret <16 x i8> %s
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
||||
|
||||
define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
|
||||
; CHECK-LABEL: foo_int8_int32:
|
||||
|
Loading…
x
Reference in New Issue
Block a user