1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-intrinsics/vqmovn.ll
Simon Tatham 605803fc34 [ARM,MVE] Add ACLE intrinsics for VQMOV[U]N family.
Summary:
These instructions work like VMOVN (narrowing a vector of wide values
to half size, and overwriting every other lane of an output register
with the result), except that the narrowing conversion is saturating.
They come in three signedness flavours: signed to signed, unsigned to
unsigned, and signed to unsigned. All are represented in IR by a
target-specific intrinsic that takes two separate 'unsigned' flags.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75252
2020-03-02 10:33:30 +00:00

300 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_s16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovnbq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnb.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 0)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_s32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovnbq_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnb.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 0)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_u16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovnbq_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnb.u16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_u32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovnbq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnb.u32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_s16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovntq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 1)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_s32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovntq_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 1)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_u16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovntq_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnt.u16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_u32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovntq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovnt.u32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 1)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovunbq_s16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovunbq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovunb.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovunbq_s32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovunbq_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovunb.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 0)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovuntq_s16(<16 x i8> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqmovuntq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovunt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1)
ret <16 x i8> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovuntq_s32(<8 x i16> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqmovuntq_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqmovunt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 1)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovnbq_m_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovnbt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovnbq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovnbt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovnbq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovnbt.u16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovnbq_m_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovnbt.u32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovntq_m_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovntt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovntq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovntt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovntq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovntt.u16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovntq_m_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovntt.u32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovunbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovunbq_m_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovunbt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovunbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovunbq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovunbt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <16 x i8> @test_vqmovuntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovuntq_m_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovuntt.s16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1)
ret <16 x i8> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vqmovuntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vqmovuntq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqmovuntt.s32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1)
ret <8 x i16> %2
}
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
declare <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32)
declare <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32)
declare <16 x i8> @llvm.arm.mve.vqmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, i32, i32, <8 x i1>)
declare <8 x i16> @llvm.arm.mve.vqmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, i32, i32, <4 x i1>)