[ARM,MVE] Add intrinsics for FP rounding operations.

Summary: This adds the unpredicated forms of six different MVE intrinsics which all round a vector of floating-point numbers to integer values, leaving them still in FP format, differing only in rounding mode and exception settings. Five of them map to existing target-independent intrinsics in LLVM IR, such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn` instruction, is done by inventing a target-specific intrinsic. (`vrintn` behaves the same as `vrintx` in terms of the output value: the side effects on the FPSCR flags are the only difference between the two. But ACLE specifies separate user-callable intrinsics for the two, so the side effects matter enough to make sure we generate the right one of the two instructions in each case.) Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74333
2024-11-23 03:02:36 +01:00 · 2020-02-17 17:05:13 +00:00 · 2020-02-17 17:05:13 +00:00 · 2d913ae276
commit 2d913ae276
parent 4d0e92ae7b
3 changed files with 33 additions and 0 deletions
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@ -1158,4 +1158,8 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
  [llvm_anyvector_ty /* output */], [llvm_i32_ty],
  [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
  LLVMMatchType<0>, llvm_anyvector_ty>;
+
+def int_arm_mve_vrintn: Intrinsic<
+  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
 } // end TargetPrefix
--- a/lib/Target/ARM/ARMInstrMVE.td
+++ b/lib/Target/ARM/ARMInstrMVE.td
@ -3179,6 +3179,10 @@ let Predicates = [HasMVEFloat] in {
            (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
  def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
            (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>;
 }

 class MVEFloatArithNeon<string iname, string suffix, bit size,
--- a/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
+++ b/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vrndnq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrintn.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> %a)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndnq_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vrndnq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrintn.f32 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> %a)
+  ret <4 x float> %0
+}
+
+declare <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float>)