mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
0c599cd301
This fills in the gaps for basic MVE loads and stores, allowing unaligned access and adding far too many tests. These will become important as narrowing/expanding and pre/post inc are added. Big endian might still not be handled very well, because we have not yet added bitcasts (and I'm not sure how we want it to work yet). I've included the alignment code anyway which maps with our current patterns. We plan to return to that later. Code written by Simon Tatham, with additional tests from Me and Mikhail Maltsev. Differential Revision: https://reviews.llvm.org/D63838 llvm-svn: 364633
67 lines
2.3 KiB
LLVM
67 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -o - %s | FileCheck %s
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @vector_add_by_value(<4 x i32> %lhs, <4 x i32>%rhs) {
|
|
; CHECK-LABEL: vector_add_by_value:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: @APP
|
|
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
|
; CHECK-NEXT: @NO_APP
|
|
; CHECK-NEXT: bx lr
|
|
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
|
|
ret <4 x i32> %result
|
|
}
|
|
|
|
define void @vector_add_by_reference(<4 x i32>* %resultp, <4 x i32>* %lhsp, <4 x i32>* %rhsp) {
|
|
; CHECK-LABEL: vector_add_by_reference:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
|
; CHECK-NEXT: vldrw.u32 q1, [r2]
|
|
; CHECK-NEXT: @APP
|
|
; CHECK-NEXT: vadd.i32 q0, q0, q1
|
|
; CHECK-NEXT: @NO_APP
|
|
; CHECK-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
%lhs = load <4 x i32>, <4 x i32>* %lhsp, align 16
|
|
%rhs = load <4 x i32>, <4 x i32>* %rhsp, align 16
|
|
%result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs)
|
|
store <4 x i32> %result, <4 x i32>* %resultp, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @vector_f64_copy(<2 x double>* %from, <2 x double>* %to) {
|
|
; CHECK-LABEL: vector_f64_copy:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vstrw.32 q0, [r1]
|
|
; CHECK-NEXT: bx lr
|
|
%v = load <2 x double>, <2 x double>* %from, align 16
|
|
store <2 x double> %v, <2 x double>* %to, align 16
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @stack_slot_handling(<16 x i8> %a) #0 {
|
|
; CHECK-LABEL: stack_slot_handling:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: push {r4, r6, r7, lr}
|
|
; CHECK-NEXT: add r7, sp, #8
|
|
; CHECK-NEXT: sub sp, #16
|
|
; CHECK-NEXT: mov r4, sp
|
|
; CHECK-NEXT: bfc r4, #0, #4
|
|
; CHECK-NEXT: mov sp, r4
|
|
; CHECK-NEXT: mov r0, sp
|
|
; CHECK-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: sub.w r4, r7, #8
|
|
; CHECK-NEXT: mov sp, r4
|
|
; CHECK-NEXT: pop {r4, r6, r7, pc}
|
|
entry:
|
|
%a.addr = alloca <16 x i8>, align 8
|
|
store <16 x i8> %a, <16 x i8>* %a.addr, align 8
|
|
%0 = load <16 x i8>, <16 x i8>* %a.addr, align 8
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
attributes #0 = { noinline optnone }
|