1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-basic.ll
David Green 0c599cd301 [ARM] MVE loads and stores
This fills in the gaps for basic MVE loads and stores, allowing unaligned
access and adding far too many tests. These will become important as
narrowing/expanding and pre/post inc are added. Big endian might still not be
handled very well, because we have not yet added bitcasts (and I'm not sure how
we want it to work yet). I've included the alignment code anyway which maps
with our current patterns. We plan to return to that later.

Code written by Simon Tatham, with additional tests from Me and Mikhail Maltsev.

Differential Revision: https://reviews.llvm.org/D63838

llvm-svn: 364633
2019-06-28 08:41:40 +00:00

67 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -o - %s | FileCheck %s
define arm_aapcs_vfpcc <4 x i32> @vector_add_by_value(<4 x i32> %lhs, <4 x i32>%rhs) {
; CHECK-LABEL: vector_add_by_value:
; CHECK: @ %bb.0:
; CHECK-NEXT: @APP
; CHECK-NEXT: vadd.i32 q0, q0, q1
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: bx lr
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
ret <4 x i32> %result
}
define void @vector_add_by_reference(<4 x i32>* %resultp, <4 x i32>* %lhsp, <4 x i32>* %rhsp) {
; CHECK-LABEL: vector_add_by_reference:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: @APP
; CHECK-NEXT: vadd.i32 q0, q0, q1
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
%lhs = load <4 x i32>, <4 x i32>* %lhsp, align 16
%rhs = load <4 x i32>, <4 x i32>* %rhsp, align 16
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
store <4 x i32> %result, <4 x i32>* %resultp, align 16
ret void
}
define void @vector_f64_copy(<2 x double>* %from, <2 x double>* %to) {
; CHECK-LABEL: vector_f64_copy:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
%v = load <2 x double>, <2 x double>* %from, align 16
store <2 x double> %v, <2 x double>* %to, align 16
ret void
}
define arm_aapcs_vfpcc <16 x i8> @stack_slot_handling(<16 x i8> %a) #0 {
; CHECK-LABEL: stack_slot_handling:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, r6, r7, lr}
; CHECK-NEXT: add r7, sp, #8
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: mov r4, sp
; CHECK-NEXT: bfc r4, #0, #4
; CHECK-NEXT: mov sp, r4
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: sub.w r4, r7, #8
; CHECK-NEXT: mov sp, r4
; CHECK-NEXT: pop {r4, r6, r7, pc}
entry:
%a.addr = alloca <16 x i8>, align 8
store <16 x i8> %a, <16 x i8>* %a.addr, align 8
%0 = load <16 x i8>, <16 x i8>* %a.addr, align 8
ret <16 x i8> %0
}
attributes #0 = { noinline optnone }