mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
2a0f923462
This adds support code for building and shuffling i1 predicate registers. It generally uses two basic principles, either converting the predicate into an scalar (through a PREDICATE_CAST) and doing scalar operations on it there, or by converting the register to an full vector register and back. Some of the code here is a not super efficient but will hopefully cover most cases of moving i1 vectors around and can be improved in subsequent patches. Some code by David Sherwood. Differential Revision: https://reviews.llvm.org/D65052 llvm-svn: 366890
197 lines
6.3 KiB
LLVM
197 lines
6.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @build_true_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
; CHECK-LABEL: build_true_v4i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @build_false_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
; CHECK-LABEL: build_false_v4i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <4 x i1> <i1 0, i1 0, i1 0, i1 0>, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @build_upper_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
; CHECK-LABEL: build_upper_v4i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: mov.w r0, #65280
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <4 x i1> <i1 0, i1 0, i1 1, i1 1>, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @build_lower_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
; CHECK-LABEL: build_lower_v4i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: movs r0, #255
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @build_true_v8i1(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: build_true_v8i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %a, <8 x i16> %b
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @build_false_v8i1(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: build_false_v8i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %a, <8 x i16> %b
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @build_upper_v8i1(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: build_upper_v8i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: mov.w r0, #65280
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %a, <8 x i16> %b
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @build_lower_v8i1(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: build_lower_v8i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: movs r0, #255
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %a, <8 x i16> %b
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @build_true_v16i1(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: build_true_v16i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %a, <16 x i8> %b
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @build_false_v16i1(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: build_false_v16i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <16 x i8> %a, <16 x i8> %b
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @build_upper_v16i1(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: build_upper_v16i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: mov.w r0, #65280
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %a, <16 x i8> %b
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @build_lower_v16i1(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: build_lower_v16i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: movs r0, #255
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <16 x i8> %a, <16 x i8> %b
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @build_true_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: build_true_v2i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <2 x i1> <i1 1, i1 1>, <2 x i64> %a, <2 x i64> %b
|
|
ret <2 x i64> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @build_false_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: build_false_v2i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%s = select <2 x i1> <i1 0, i1 0>, <2 x i64> %a, <2 x i64> %b
|
|
ret <2 x i64> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @build_upper_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: build_upper_v2i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: adr r0, .LCPI14_0
|
|
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
; CHECK-NEXT: vand q0, q0, q2
|
|
; CHECK-NEXT: vorr q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: @ %bb.1:
|
|
; CHECK-NEXT: .LCPI14_0:
|
|
; CHECK-NEXT: .long 0 @ 0x0
|
|
; CHECK-NEXT: .long 0 @ 0x0
|
|
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
|
|
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
|
|
entry:
|
|
%s = select <2 x i1> <i1 0, i1 1>, <2 x i64> %a, <2 x i64> %b
|
|
ret <2 x i64> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @build_lower_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: build_lower_v2i1:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: adr r0, .LCPI15_0
|
|
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
; CHECK-NEXT: vand q0, q0, q2
|
|
; CHECK-NEXT: vorr q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: @ %bb.1:
|
|
; CHECK-NEXT: .LCPI15_0:
|
|
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
|
|
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
|
|
; CHECK-NEXT: .long 0 @ 0x0
|
|
; CHECK-NEXT: .long 0 @ 0x0
|
|
entry:
|
|
%s = select <2 x i1> <i1 1, i1 0>, <2 x i64> %a, <2 x i64> %b
|
|
ret <2 x i64> %s
|
|
}
|