mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
86da1af72a
Summary: These instructions make a vector of `<4 x float>` by widening every other lane of a vector of `<8 x half>`. I wondered about representing these using standard IR, along the lines of a shufflevector to extract elements of the input into a `<4 x half>` followed by an `fpext` to turn that into `<4 x float>`. But it looks as if that would take a lot of work in isel lowering to make it match any pattern I could sensibly write in Tablegen, and also I haven't been able to think of any other case where that pattern might be generated in IR, so there wouldn't be any extra code generation win from doing it that way. Therefore, I've just used another target-specific intrinsic. We can always change it to the other way later if anyone thinks of a good reason. (In order to put the intrinsic definition near similar things in `IntrinsicsARM.td`, I've also lifted the definition of the `MVEMXPredicated` multiclass higher up the file, without changing it.) Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75254
420 lines
16 KiB
LLVM
420 lines
16 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
|
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
|
|
|
declare <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half>, <4 x float>, i32)
|
|
declare <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half>, <4 x float>, i32, <4 x i1>)
|
|
declare <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half>, i32)
|
|
declare <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float>, <8 x half>, i32, <4 x i1>)
|
|
|
|
declare <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32, <8 x i16>, i32)
|
|
declare <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32, <4 x i32>, i32)
|
|
declare <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32, <8 x half>, i32)
|
|
declare <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32, <4 x float>, i32)
|
|
declare <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32, <8 x half>, <8 x i16>, i32, <8 x i1>)
|
|
declare <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32, <4 x float>, <4 x i32>, i32, <4 x i1>)
|
|
declare <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, i32, <8 x i1>)
|
|
declare <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, i32, <4 x i1>)
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvttq_f16_f32(<8 x half> %a, <4 x float> %b) {
|
|
; CHECK-LABEL: test_vcvttq_f16_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvtt.f16.f32 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 1)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_f16_f32(<8 x half> %a, <4 x float> %b) {
|
|
; CHECK-LABEL: test_vcvtbq_f16_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvtb.f16.f32 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> %a, <4 x float> %b, i32 0)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvttq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvttq_m_f16_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvttt.f16.f32 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 1, <4 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtbq_m_f16_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtbt.f16.f32 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> %a, <4 x float> %b, i32 0, <4 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_f16_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.f16.s16 q0, q0, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 0, <8 x i16> %a, i32 1)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_f16_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32 1, <8 x i16> %a, i32 2)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_f32_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 0, <4 x i32> %a, i32 3)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_f32_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.f32.u32 q0, q0, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32 1, <4 x i32> %a, i32 32)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_s16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 0, <8 x half> %a, i32 1)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_u16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.arm.mve.vcvt.fix.v8i16.v8f16(i32 1, <8 x half> %a, i32 2)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_s32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.s32.f32 q0, q0, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 0, <4 x float> %a, i32 3)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
|
|
; CHECK-LABEL: test_vcvtq_n_u32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvt.u32.f32 q0, q0, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.arm.mve.vcvt.fix.v4i32.v4f32(i32 1, <4 x float> %a, i32 32)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_f16_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f16.s16 q0, q1, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> %inactive, <8 x i16> %a, i32 1, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_n_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_f16_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f16.u16 q0, q1, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> %inactive, <8 x i16> %a, i32 2, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_f32_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f32.s32 q0, q1, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> %inactive, <4 x i32> %a, i32 3, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_n_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_f32_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f32.u32 q0, q1, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> %inactive, <4 x i32> %a, i32 32, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_s16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.s16.f16 q0, q1, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, i32 1, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_n_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_u16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.u16.f16 q0, q1, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, i32 2, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_s32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.s32.f32 q0, q1, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, i32 3, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_n_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_m_n_u32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.u32.f32 q0, q1, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, i32 32, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_s16(<8 x i16> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_f16_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f16.s16 q0, q0, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 0, <8 x half> undef, <8 x i16> %a, i32 1, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_x_n_f16_u16(<8 x i16> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_f16_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f16.u16 q0, q0, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x half> @llvm.arm.mve.vcvt.fix.predicated.v8f16.v8i16.v8i1(i32 1, <8 x half> undef, <8 x i16> %a, i32 2, <8 x i1> %1)
|
|
ret <8 x half> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_s32(<4 x i32> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_f32_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f32.s32 q0, q0, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 0, <4 x float> undef, <4 x i32> %a, i32 3, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_x_n_f32_u32(<4 x i32> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_f32_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.f32.u32 q0, q0, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x float> @llvm.arm.mve.vcvt.fix.predicated.v4f32.v4i32.v4i1(i32 1, <4 x float> undef, <4 x i32> %a, i32 32, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_s16_f16(<8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_s16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.s16.f16 q0, q0, #1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, i32 1, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_x_n_u16_f16(<8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_u16_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.u16.f16 q0, q0, #2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = call <8 x i16> @llvm.arm.mve.vcvt.fix.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, i32 2, <8 x i1> %1)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_s32_f32(<4 x float> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_s32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.s32.f32 q0, q0, #3
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, i32 3, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_x_n_u32_f32(<4 x float> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtq_x_n_u32_f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtt.u32.f32 q0, q0, #32
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, i32 32, <4 x i1> %1)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_f32_f16(<8 x half> %a) {
|
|
; CHECK-LABEL: test_vcvtbq_f32_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvtb.f32.f16 q0, q0
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 0)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvttq_f32_f16(<8 x half> %a) {
|
|
; CHECK-LABEL: test_vcvttq_f32_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vcvtt.f32.f16 q0, q0
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 1)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvtbq_m_f32_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvtbt.f32.f16 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 0, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vcvttq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
|
|
; CHECK-LABEL: test_vcvttq_m_f32_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vcvttt.f32.f16 q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 1, <4 x i1> %1)
|
|
ret <4 x float> %2
|
|
}
|