1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll
Simon Tatham 86da1af72a [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 family.
Summary:
These instructions make a vector of `<4 x float>` by widening every
other lane of a vector of `<8 x half>`.

I wondered about representing these using standard IR, along the lines
of a shufflevector to extract elements of the input into a `<4 x half>`
followed by an `fpext` to turn that into `<4 x float>`. But it looks as
if that would take a lot of work in isel lowering to make it match any
pattern I could sensibly write in Tablegen, and also I haven't been
able to think of any other case where that pattern might be generated
in IR, so there wouldn't be any extra code generation win from doing
it that way.

Therefore, I've just used another target-specific intrinsic. We can
always change it to the other way later if anyone thinks of a good
reason.

(In order to put the intrinsic definition near similar things in
`IntrinsicsARM.td`, I've also lifted the definition of the
`MVEMXPredicated` multiclass higher up the file, without changing it.)

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75254
2020-03-02 10:33:30 +00:00

420 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
declare <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half>, <4 x float>, i32)
declare <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half>, <4 x float>, i32, <4 x i1>)
declare <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half>, i32)
declare <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float>, <8 x half>, i32, <4 x i1>)
declare <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32, <8 x i16>, i32)
declare <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32, <4 x i32>, i32)
declare <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32, <8 x half>, i32)
declare <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32, <4 x float>, i32)
declare <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32, <8 x half>, <8 x i16>, i32, <8 x i1>)
declare <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32, <4 x float>, <4 x i32>, i32, <4 x i1>)
declare <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, i32, <8 x i1>)
declare <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, i32, <4 x i1>)
define arm_aapcs_vfpcc <8 x half> @test_vcvttq_f16_f32(<8 x half> %a, <4 x float> %b) {
; CHECK-LABEL: test_vcvttq_f16_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvtt.f16.f32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 1)
ret <8 x half> %0
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_f16_f32(<8 x half> %a, <4 x float> %b) {
; CHECK-LABEL: test_vcvtbq_f16_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvtb.f16.f32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 0)
ret <8 x half> %0
}
define arm_aapcs_vfpcc <8 x half> @test_vcvttq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vcvttq_m_f16_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvttt.f16.f32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 1, <4 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtbq_m_f16_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtbt.f16.f32 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 0, <4 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
; CHECK-LABEL: test_vcvtq_n_f16_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.f16.s16 q0, q0, #1
; CHECK-NEXT: bx lr
entry:
%0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> %a, i32 1)
ret <8 x half> %0
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
; CHECK-LABEL: test_vcvtq_n_f16_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2
; CHECK-NEXT: bx lr
entry:
%0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> %a, i32 2)
ret <8 x half> %0
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
; CHECK-LABEL: test_vcvtq_n_f32_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
; CHECK-NEXT: bx lr
entry:
%0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> %a, i32 3)
ret <4 x float> %0
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
; CHECK-LABEL: test_vcvtq_n_f32_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.f32.u32 q0, q0, #32
; CHECK-NEXT: bx lr
entry:
%0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> %a, i32 32)
ret <4 x float> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
; CHECK-LABEL: test_vcvtq_n_s16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1
; CHECK-NEXT: bx lr
entry:
%0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> %a, i32 1)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
; CHECK-LABEL: test_vcvtq_n_u16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2
; CHECK-NEXT: bx lr
entry:
%0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> %a, i32 2)
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
; CHECK-LABEL: test_vcvtq_n_s32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 q0, q0, #3
; CHECK-NEXT: bx lr
entry:
%0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> %a, i32 3)
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
; CHECK-LABEL: test_vcvtq_n_u32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u32.f32 q0, q0, #32
; CHECK-NEXT: bx lr
entry:
%0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> %a, i32 32)
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_f16_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f16.s16 q0, q1, #1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> %inactive, <8 x i16> %a, i32 1, <8 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_f16_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f16.u16 q0, q1, #2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> %inactive, <8 x i16> %a, i32 2, <8 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_f32_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f32.s32 q0, q1, #3
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> %inactive, <4 x i32> %a, i32 3, <4 x i1> %1)
ret <4 x float> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_f32_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f32.u32 q0, q1, #32
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> %inactive, <4 x i32> %a, i32 32, <4 x i1> %1)
ret <4 x float> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_s16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.s16.f16 q0, q1, #1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, i32 1, <8 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_u16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.u16.f16 q0, q1, #2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, i32 2, <8 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_s32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.s32.f32 q0, q1, #3
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, i32 3, <4 x i1> %1)
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_m_n_u32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.u32.f32 q0, q1, #32
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, i32 32, <4 x i1> %1)
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_s16(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_f16_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f16.s16 q0, q0, #1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> %a, i32 1, <8 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_u16(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_f16_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f16.u16 q0, q0, #2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> %a, i32 2, <8 x i1> %1)
ret <8 x half> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_s32(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_f32_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f32.s32 q0, q0, #3
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> %a, i32 3, <4 x i1> %1)
ret <4 x float> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_u32(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_f32_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.f32.u32 q0, q0, #32
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> %a, i32 32, <4 x i1> %1)
ret <4 x float> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_s16_f16(<8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_s16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.s16.f16 q0, q0, #1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, i32 1, <8 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_u16_f16(<8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_u16_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.u16.f16 q0, q0, #2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, i32 2, <8 x i1> %1)
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_s32_f32(<4 x float> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_s32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.s32.f32 q0, q0, #3
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, i32 3, <4 x i1> %1)
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_u32_f32(<4 x float> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtq_x_n_u32_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtt.u32.f32 q0, q0, #32
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, i32 32, <4 x i1> %1)
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_f32_f16(<8 x half> %a) {
; CHECK-LABEL: test_vcvtbq_f32_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvtb.f32.f16 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 0)
ret <4 x float> %0
}
define arm_aapcs_vfpcc <4 x float> @test_vcvttq_f32_f16(<8 x half> %a) {
; CHECK-LABEL: test_vcvttq_f32_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvtt.f32.f16 q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 1)
ret <4 x float> %0
}
define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvtbq_m_f32_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvtbt.f32.f16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 0, <4 x i1> %1)
ret <4 x float> %2
}
define arm_aapcs_vfpcc <4 x float> @test_vcvttq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vcvttq_m_f32_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vcvttt.f32.f16 q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 1, <4 x i1> %1)
ret <4 x float> %2
}