1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-neg.ll
David Green 9ee7075ca5 [ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
  vmov q0[2], q0[0], r2, r0
  vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.

This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:

3 2 1 0    -> vmovqrr 31; vmovqrr 20
3 2 1      -> vmovqrr 31; vmov 2
3 1        -> vmovqrr 31
2 1 0      -> vmovqrr 20; vmov 1
2 0        -> vmovqrr 20

With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.

This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-18 16:13:08 +00:00

54 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
define arm_aapcs_vfpcc <16 x i8> @neg_v16i8(<16 x i8> %s1) {
; CHECK-LABEL: neg_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vneg.s8 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = sub nsw <16 x i8> zeroinitializer, %s1
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @neg_v8i16(<8 x i16> %s1) {
; CHECK-LABEL: neg_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vneg.s16 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = sub nsw <8 x i16> zeroinitializer, %s1
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @neg_v4i32(<4 x i32> %s1) {
; CHECK-LABEL: neg_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vneg.s32 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = sub nsw <4 x i32> zeroinitializer, %s1
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: neg_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: sbc.w r0, r12, r0
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: sbc.w r3, r12, r3
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
; CHECK-NEXT: vmov q0[3], q0[1], r3, r0
; CHECK-NEXT: bx lr
entry:
%0 = sub nsw <2 x i64> zeroinitializer, %s1
ret <2 x i64> %0
}