mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
c55d376d9d
The MVE_VLDRWU32_qi_pre gather loads, like the other _pre/_post mve loads returns the writeback as result 0, the value as result 1. The llvm ir intrinsic seems to have this the other way around though, and so when lowering from one to the other we need to switch the first two outputs. I've also fixed up the types of _pre/_post on normal MVE loads. There we were already getting the values the right way around, just not for the types. I don't believe this was causing anything to go wrong, but it was very confusing to read in the debug output. Differential Revision: https://reviews.llvm.org/D73370
2019 lines
81 KiB
LLVM
2019 lines
81 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.s16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.s32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_s8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.s16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
|
|
|
declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.s32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
%2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
|
|
ret <16 x i8> %2
|
|
}
|
|
|
|
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
|
|
|
declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
%2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
|
|
ret <16 x i8> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [q0, #616]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [q0, #-336]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vldrd.u64 q0, [q1, #576]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
|
|
%2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
|
|
store <2 x i64> %2, <2 x i64>* %addr, align 8
|
|
%3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
|
|
ret <2 x i64> %3
|
|
}
|
|
|
|
declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328)
|
|
%2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
|
|
store <2 x i64> %2, <2 x i64>* %addr, align 8
|
|
%3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
|
|
ret <2 x i64> %3
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
|
|
%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
|
|
store <2 x i64> %4, <2 x i64>* %addr, align 8
|
|
%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
|
|
ret <2 x i64> %5
|
|
}
|
|
|
|
declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2)
|
|
%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
|
|
store <2 x i64> %4, <2 x i64>* %addr, align 8
|
|
%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
|
|
ret <2 x i64> %5
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [q0, #888]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) {
|
|
; CHECK-LABEL: test_vldrdq_gather_offset_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) {
|
|
; CHECK-LABEL: test_vldrdq_gather_offset_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) {
|
|
; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) {
|
|
; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.s32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.s32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [q0, #12]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [q0, #400]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [q0, #284]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64)
|
|
%2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
|
|
store <4 x i32> %2, <4 x i32>* %addr, align 8
|
|
%3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vldrw.u32 q0, [q1, #80]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
|
|
%2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
|
|
store <4 x i32> %2, <4 x i32>* %addr, align 8
|
|
%3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
|
|
ret <4 x i32> %3
|
|
}
|
|
|
|
declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vldrw.u32 q0, [q1, #480]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
|
|
%2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
|
|
store <4 x i32> %2, <4 x i32>* %addr, align 8
|
|
%3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
|
|
ret <4 x i32> %3
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
|
|
%4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
|
|
store <4 x i32> %4, <4 x i32>* %addr, align 8
|
|
%5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
|
|
ret <4 x float> %5
|
|
}
|
|
|
|
declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
|
|
%4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
|
|
store <4 x i32> %4, <4 x i32>* %addr, align 8
|
|
%5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
|
|
ret <4 x i32> %5
|
|
}
|
|
|
|
declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
|
|
%4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
|
|
store <4 x i32> %4, <4 x i32>* %addr, align 8
|
|
%5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
|
|
ret <4 x i32> %5
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [q0, #440]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [q0, #300]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.8 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
|
|
; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrb.8 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [q0, #888]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [q0, #264]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [q0, #408]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [q0, #-472]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q0, [q1, #248]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
|
|
store <2 x i64> %3, <2 x i64>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q0, [q1, #136]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2)
|
|
store <2 x i64> %3, <2 x i64>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vstrd.64 q0, [q1, #208]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
|
|
store <2 x i64> %1, <2 x i64>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vstrd.64 q0, [q1, #-168]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
|
|
%1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value)
|
|
store <2 x i64> %1, <2 x i64>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
|
|
; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [q0, #380]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [q0, #-400]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [q0, #48]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [q0, #-376]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [q0, #156]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [q0, #212]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vstrw.32 q0, [q1, #-412]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value)
|
|
store <4 x i32> %1, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q0, [q1, #236]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
|
|
store <4 x i32> %3, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q0, [q1, #328]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
|
|
store <4 x i32> %3, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q0, [q1, #412]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = zext i16 %p to i32
|
|
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
|
%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
|
|
store <4 x i32> %3, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vstrw.32 q0, [q1, #-152]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value)
|
|
store <4 x i32> %1, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vstrw.32 q0, [q1, #64]!
|
|
; CHECK-NEXT: vstrw.32 q1, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
|
|
%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
|
|
store <4 x i32> %1, <4 x i32>* %addr, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32)
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r1
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
|
|
; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
|
|
ret void
|
|
}
|