[ARM] MVE floating point compares and selects

Much like integers, this adds MVE floating point compares and select. It requires a lot more buildvector/shuffle code because we may need to expand the compares without mve.fp, and requires support for and/or because of the way we lower llvm condition codes. Some original code by David Sherwood Differential Revision: https://reviews.llvm.org/D65054 llvm-svn: 366909
2025-01-31 20:51:52 +01:00 · 2019-07-24 14:28:22 +00:00 · 2019-07-24 14:28:22 +00:00 · 41233e3473
commit 41233e3473
parent 07500b5f5e
5 changed files with 6743 additions and 1 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -288,6 +288,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
    setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+    setOperationAction(ISD::SETCC, VT, Custom);

    if (HasMVEFP) {
      setOperationAction(ISD::FMINNUM, VT, Legal);
@ -346,6 +347,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::SETCC, VT, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
  }
 }
@ -5895,6 +5897,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
    if (Op.getValueType().getVectorElementType() != MVT::i1)
      return SDValue();

+    // Make sure we expand floating point setcc to scalar if we do not have
+    // mve.fp, so that we can handle them from there.
+    if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
+      return SDValue();
+
    CmpVT = VT;
  }

@ -5925,7 +5932,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
    switch (SetCCOpcode) {
    default: llvm_unreachable("Illegal FP comparison");
    case ISD::SETUNE:
-    case ISD::SETNE:  Invert = true; LLVM_FALLTHROUGH;
+    case ISD::SETNE:
+      if (ST->hasMVEFloatOps()) {
+        Opc = ARMISD::VCNE; break;
+      } else {
+        Invert = true; LLVM_FALLTHROUGH;
+      }
    case ISD::SETOEQ:
    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
    case ISD::SETOLT:
--- a/lib/Target/ARM/ARMInstrMVE.td
+++ b/lib/Target/ARM/ARMInstrMVE.td
@ -3000,6 +3000,20 @@ multiclass unpred_vcmp_r<SDPatternOperator opnode, string suffix, int fc> {
                (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
 }

+multiclass unpred_vcmpf_z<SDPatternOperator opnode, int fc> {
+  def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1))),
+                (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
+  def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1))),
+                (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
+}
+
+multiclass unpred_vcmpf_r<SDPatternOperator opnode, int fc> {
+  def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
+                (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
+  def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
+                (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
+}
+
 let Predicates = [HasMVEInt] in {
  defm MVE_VCEQZ  : unpred_vcmp_z<ARMvceqz, "i", 0>;
  defm MVE_VCNEZ  : unpred_vcmp_z<ARMvcnez, "i", 1>;
@ -3016,6 +3030,20 @@ let Predicates = [HasMVEInt] in {
  defm MVE_VCGEU  : unpred_vcmp_r<ARMvcgeu, "u", 2>;
 }

+let Predicates = [HasMVEFloat] in {
+  defm MVE_VFCEQZ  : unpred_vcmpf_z<ARMvceqz, 0>;
+  defm MVE_VFCNEZ  : unpred_vcmpf_z<ARMvcnez, 1>;
+  defm MVE_VFCLEZ  : unpred_vcmpf_z<ARMvclez, 13>;
+  defm MVE_VFCGTZ  : unpred_vcmpf_z<ARMvcgtz, 12>;
+  defm MVE_VFCLTZ  : unpred_vcmpf_z<ARMvcltz, 11>;
+  defm MVE_VFCGEZ  : unpred_vcmpf_z<ARMvcgez, 10>;
+
+  defm MVE_VFCGT   : unpred_vcmpf_r<ARMvcgt, 12>;
+  defm MVE_VFCGE   : unpred_vcmpf_r<ARMvcge, 10>;
+  defm MVE_VFCEQ   : unpred_vcmpf_r<ARMvceq, 0>;
+  defm MVE_VFCNE   : unpred_vcmpf_r<ARMvcne, 1>;
+}
+

 // Extra "worst case" and/or/xor partterns, going into and out of GRP
 multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
@ -4457,6 +4485,11 @@ let Predicates = [HasMVEInt] in {
  def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
            (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;

+  def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
+            (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+  def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
+            (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+
  def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
            (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
                              (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
@ -4467,6 +4500,13 @@ let Predicates = [HasMVEInt] in {
            (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
                              (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;

+  def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
+            (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+                              (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+  def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
+            (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
+                              (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+
  def : Pat<(v16i8 (zext  (v16i1 VCCR:$pred))),
            (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
  def : Pat<(v8i16 (zext  (v8i1  VCCR:$pred))),
--- a/test/CodeGen/Thumb2/mve-vcmpf.ll
+++ b/test/CodeGen/Thumb2/mve-vcmpf.ll
--- a/test/CodeGen/Thumb2/mve-vcmpfz.ll
+++ b/test/CodeGen/Thumb2/mve-vcmpfz.ll
--- a/test/CodeGen/Thumb2/mve-vpsel.ll
+++ b/test/CodeGen/Thumb2/mve-vpsel.ll
@ -37,6 +37,30 @@ entry:
  ret <4 x i32> %1
 }

+define arm_aapcs_vfpcc <8 x half> @vpsel_f16(<8 x i1> *%mask, <8 x half> %src1, <8 x half> %src2) {
+; CHECK-LABEL: vpsel_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr p0, [r0]
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = load <8 x i1>, <8 x i1>* %mask, align 4
+  %1 = select <8 x i1> %0, <8 x half> %src1, <8 x half> %src2
+  ret <8 x half> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @vpsel_f32(<4 x i1> *%mask, <4 x float> %src1, <4 x float> %src2) {
+; CHECK-LABEL: vpsel_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr p0, [r0]
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = load <4 x i1>, <4 x i1>* %mask, align 4
+  %1 = select <4 x i1> %0, <4 x float> %src1, <4 x float> %src2
+  ret <4 x float> %1
+}
+
 define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) {
 ; CHECK-LABEL: foo:
 ; CHECK:       @ %bb.0: