1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-abs.ll
David Green 9ee7075ca5 [ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
  vmov q0[2], q0[0], r2, r0
  vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.

This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:

3 2 1 0    -> vmovqrr 31; vmovqrr 20
3 2 1      -> vmovqrr 31; vmov 2
3 1        -> vmovqrr 31
2 1 0      -> vmovqrr 20; vmov 1
2 0        -> vmovqrr 20

With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.

This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-18 16:13:08 +00:00

64 lines
2.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
define arm_aapcs_vfpcc <16 x i8> @abs_v16i8(<16 x i8> %s1) {
; CHECK-LABEL: abs_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vabs.s8 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <16 x i8> %s1, zeroinitializer
%1 = sub nsw <16 x i8> zeroinitializer, %s1
%2 = select <16 x i1> %0, <16 x i8> %1, <16 x i8> %s1
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @abs_v8i16(<8 x i16> %s1) {
; CHECK-LABEL: abs_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vabs.s16 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <8 x i16> %s1, zeroinitializer
%1 = sub nsw <8 x i16> zeroinitializer, %s1
%2 = select <8 x i1> %0, <8 x i16> %1, <8 x i16> %s1
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <4 x i32> @abs_v4i32(<4 x i32> %s1) {
; CHECK-LABEL: abs_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vabs.s32 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <4 x i32> %s1, zeroinitializer
%1 = sub nsw <4 x i32> zeroinitializer, %s1
%2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: abs_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
; CHECK-NEXT: adc.w r12, r0, r0, asr #31
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
; CHECK-NEXT: adds.w r2, r2, r3, asr #31
; CHECK-NEXT: eor.w r0, r12, r0, asr #31
; CHECK-NEXT: eor.w r2, r2, r3, asr #31
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
; CHECK-NEXT: adc.w r1, r3, r3, asr #31
; CHECK-NEXT: eor.w r1, r1, r3, asr #31
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <2 x i64> %s1, zeroinitializer
%1 = sub nsw <2 x i64> zeroinitializer, %s1
%2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
ret <2 x i64> %2
}