[AArch64][SVE] Add fixed length codegen for FP_TO_{S,U}INT/{S,U}INT_TO_FP

Depends on D102607 Differential Revision: https://reviews.llvm.org/D102777
2024-11-22 18:54:02 +01:00 · 2021-05-18 13:49:27 +01:00 · 2021-05-18 13:49:27 +01:00 · 1d41093ecb
commit 1d41093ecb
parent af1a311083
5 changed files with 3621 additions and 70 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -1482,6 +1482,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::FNEG, VT, Custom);
  setOperationAction(ISD::FP_EXTEND, VT, Custom);
  setOperationAction(ISD::FP_ROUND, VT, Custom);
+  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
  setOperationAction(ISD::FRINT, VT, Custom);
  setOperationAction(ISD::FROUND, VT, Custom);
  setOperationAction(ISD::FROUNDEVEN, VT, Custom);
@ -1501,6 +1503,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::SHL, VT, Custom);
  setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
  setOperationAction(ISD::SMAX, VT, Custom);
  setOperationAction(ISD::SMIN, VT, Custom);
  setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@ -1510,6 +1513,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::SUB, VT, Custom);
  setOperationAction(ISD::TRUNCATE, VT, Custom);
  setOperationAction(ISD::UDIV, VT, Custom);
+  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
  setOperationAction(ISD::UMAX, VT, Custom);
  setOperationAction(ISD::UMIN, VT, Custom);
  setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@ -3260,6 +3264,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
    return LowerToPredicatedOp(Op, DAG, Opcode);
  }

+  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
+    return LowerFixedLengthFPToIntToSVE(Op, DAG);
+
  unsigned NumElts = InVT.getVectorNumElements();

  // f16 conversions are promoted to f32 when full fp16 is not supported.
@ -3384,6 +3391,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
    return LowerToPredicatedOp(Op, DAG, Opcode);
  }

+  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
+    return LowerFixedLengthIntToFPToSVE(Op, DAG);
+
  uint64_t VTSize = VT.getFixedSizeInBits();
  uint64_t InVTSize = InVT.getFixedSizeInBits();
  if (VTSize < InVTSize) {
@ -17994,6 +18004,95 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
 }

+SDValue
+AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
+  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
+                             : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
+
+  SDLoc DL(Op);
+  SDValue Val = Op.getOperand(0);
+  EVT SrcVT = Val.getValueType();
+  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
+  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
+
+  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
+      ContainerDstVT.getVectorElementType().getSizeInBits()) {
+    SDValue Pg = getPredicateForVector(DAG, DL, VT);
+
+    Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
+                      VT.changeTypeToInteger(), Val);
+
+    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
+    Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
+    // Safe to use a larger than specified operand since we just unpacked the
+    // data, hence the upper bits are zero.
+    Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
+                      DAG.getUNDEF(ContainerDstVT));
+    return convertFromScalableVector(DAG, VT, Val);
+  } else {
+    EVT CvtVT = ContainerSrcVT.changeVectorElementType(
+        ContainerDstVT.getVectorElementType());
+    SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
+
+    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
+    Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
+    Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
+    Val = convertFromScalableVector(DAG, SrcVT, Val);
+
+    Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
+    return DAG.getNode(ISD::BITCAST, DL, VT, Val);
+  }
+}
+
+SDValue
+AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
+  unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
+                             : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
+
+  SDLoc DL(Op);
+  SDValue Val = Op.getOperand(0);
+  EVT SrcVT = Val.getValueType();
+  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
+  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
+
+  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
+      ContainerDstVT.getVectorElementType().getSizeInBits()) {
+    EVT CvtVT = ContainerDstVT.changeVectorElementType(
+      ContainerSrcVT.getVectorElementType());
+    SDValue Pg = getPredicateForVector(DAG, DL, VT);
+
+    Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
+    Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
+
+    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
+    Val = getSVESafeBitCast(CvtVT, Val, DAG);
+    Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
+                      DAG.getUNDEF(ContainerDstVT));
+    return convertFromScalableVector(DAG, VT, Val);
+  } else {
+    EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
+    SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
+
+    // Safe to use a larger than specified result since an fp_to_int where the
+    // result doesn't fit into the destination is undefined.
+    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
+    Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
+    Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
+
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
+  }
+}
+
 SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
                                                 SelectionDAG &DAG) const {
  SDLoc DL(Op);
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@ -1005,6 +1005,8 @@ private:
                                             SelectionDAG &DAG) const;
  SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;

  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                        SmallVectorImpl<SDNode *> &Created) const override;
--- a/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
+++ b/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
@ -1,70 +0,0 @@
-; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
-; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; Don't use SVE when its registers are no bigger than NEON.
-; NO_SVE-NOT: z{0-9}
-
-; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
-; ensure we've correctly set the operation action for fixed length vector types
-; that require SVE. They'll be updated to protect their expected code generation
-; when lowering it implemented.
-
-;
-; vector uint_to_fp i8 -> f32
-; AArch64 doesn't have a direct vector->f32 conversion instructions for
-; elements smaller than i32, so make sure inputs are promoted to i32 first.
-;
-
-define void @uitofp_v4i8_v4f32(<4 x i8>* %in, <4 x float>* %out) #0 {
-; CHECK-LABEL: uitofp_v4i8_v4f32:
-; CHECK-COUNT-1: ucvt
-  %vec = load <4 x i8>, <4 x i8>* %in
-  %conv = uitofp <4 x i8> %vec to <4 x float>
-  store <4 x float> %conv, <4 x float>* %out
-  ret void
-}
-
-define void @uitofp_v8i8_v8f32(<8 x i8>* %in, <8 x float>* %out) #0 {
-; CHECK-LABEL: uitofp_v8i8_v8f32:
-; CHECK-COUNT-8: ucvt
-  %vec = load <8 x i8>, <8 x i8>* %in
-  %conv = uitofp <8 x i8> %vec to <8 x float>
-  store <8 x float> %conv, <8 x float>* %out
-  ret void
-}
-
-define void @uitofp_v16i8_v16f32(<16 x i8>* %in, <16 x float>* %out) #0 {
-; CHECK-LABEL: uitofp_v16i8_v16f32:
-; CHECK-COUNT-16: ucvt
-  %vec = load <16 x i8>, <16 x i8>* %in
-  %conv = uitofp <16 x i8> %vec to <16 x float>
-  store <16 x float> %conv, <16 x float>* %out
-  ret void
-}
-
-define void @uitofp_v32i8_v32f32(<32 x i8>* %in, <32 x float>* %out) #0 {
-; CHECK-LABEL: uitofp_v32i8_v32f32:
-; CHECK-COUNT-32: ucvt
-  %vec = load <32 x i8>, <32 x i8>* %in
-  %conv = uitofp <32 x i8> %vec to <32 x float>
-  store <32 x float> %conv, <32 x float>* %out
-  ret void
-}
-
-attributes #0 = { nounwind "target-features"="+sve" }
--- a/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
--- a/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll