[PowerPC] Support constrained vector fp/int conversion

This patch makes these operations legal, and add necessary codegen patterns. There's still some issue similar to D77033 for conversion from v1i128 type. But normal type tests synced in vector-constrained-fp-intrinsic are passed successfully. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D83654
2025-01-31 12:41:49 +01:00 · 2020-08-24 10:10:27 +08:00 · 2020-08-24 10:10:27 +08:00 · 6cd03c3d8a
commit 6cd03c3d8a
parent 6072801a3e
4 changed files with 2738 additions and 277 deletions
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -824,6 +824,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setOperationAction(ISD::SELECT, MVT::v4i32,
                       Subtarget.useCRBits() ? Legal : Expand);
    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
@ -1002,6 +1006,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);

+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
+      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
+      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
@ -1010,6 +1018,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      // Custom handling for partial vectors of integers converted to
      // floating point. We already have optimal handling for v2i32 through
      // the DAG combine, so those aren't necessary.
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
@ -8346,17 +8362,19 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {

 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
                                                const SDLoc &dl) const {
-
+  bool IsStrict = Op->isStrictFPOpcode();
  unsigned Opc = Op.getOpcode();
-  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
+          Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
         "Unexpected conversion type");
  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
         "Supports conversions to v2f64/v4f32 only.");

-  bool SignedConv = Opc == ISD::SINT_TO_FP;
+  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
  bool FourEltRes = Op.getValueType() == MVT::v4f32;

-  SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+  SDValue Wide = widenVec(DAG, Src, dl);
  EVT WideVT = Wide.getValueType();
  unsigned WideNumElts = WideVT.getVectorNumElements();
  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
@ -8381,7 +8399,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
  SDValue Extend;
  if (SignedConv) {
    Arrange = DAG.getBitcast(IntermediateVT, Arrange);
-    EVT ExtVT = Op.getOperand(0).getValueType();
+    EVT ExtVT = Src.getValueType();
    if (Subtarget.hasP9Altivec())
      ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
                               IntermediateVT.getVectorNumElements());
@ -8391,6 +8409,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
  } else
    Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);

+  if (IsStrict)
+    return DAG.getNode(Opc, dl, {Op.getValueType(), MVT::Other},
+                       {Op.getOperand(0), Extend});
+
  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
 }

@ -10648,6 +10670,28 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  }
 }

+void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
+                                              SmallVectorImpl<SDValue> &Results,
+                                              SelectionDAG &DAG) const {
+  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+  if (!Res.getNode())
+    return;
+
+  // Take the return value as-is if original node has only one result.
+  if (N->getNumValues() == 1) {
+    Results.push_back(Res);
+    return;
+  }
+
+  // New node should have the same number of results.
+  assert((N->getNumValues() == Res->getNumValues()) &&
+      "Lowering returned the wrong number of results!");
+
+  for (unsigned i = 0; i < N->getNumValues(); ++i)
+    Results.push_back(Res.getValue(i));
+}
+
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                           SmallVectorImpl<SDValue>&Results,
                                           SelectionDAG &DAG) const {
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -753,6 +753,12 @@ namespace llvm {
    ///
    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

+    /// LowerOperationWrapper - Place custom new result values for node in
+    /// Results.
+    void LowerOperationWrapper(SDNode *N,
+                               SmallVectorImpl<SDValue> &Results,
+                               SelectionDAG &DAG) const override;
+
    /// ReplaceNodeResults - Replace the results of node with an illegal result
    /// type with new values built out of custom code.
    ///
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -821,7 +821,7 @@ let hasSideEffects = 0 in {
  def XVCVDPSXDS : XX2Form<60, 472,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvdpsxds $XT, $XB", IIC_VecFP,
-                      [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+                      [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>;
  def XVCVDPSXWS : XX2Form<60, 216,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvdpsxws $XT, $XB", IIC_VecFP,
@ -829,7 +829,7 @@ let hasSideEffects = 0 in {
  def XVCVDPUXDS : XX2Form<60, 456,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvdpuxds $XT, $XB", IIC_VecFP,
-                      [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+                      [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>;
  def XVCVDPUXWS : XX2Form<60, 200,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvdpuxws $XT, $XB", IIC_VecFP,
@ -845,18 +845,18 @@ let hasSideEffects = 0 in {
  def XVCVSPSXWS : XX2Form<60, 152,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvspsxws $XT, $XB", IIC_VecFP,
-                      [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
+                      [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>;
  def XVCVSPUXDS : XX2Form<60, 392,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
  def XVCVSPUXWS : XX2Form<60, 136,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvspuxws $XT, $XB", IIC_VecFP,
-                      [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
+                      [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>;
  def XVCVSXDDP : XX2Form<60, 504,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvsxddp $XT, $XB", IIC_VecFP,
-                      [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+                      [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>;
  def XVCVSXDSP : XX2Form<60, 440,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvsxdsp $XT, $XB", IIC_VecFP,
@ -868,11 +868,11 @@ let hasSideEffects = 0 in {
  def XVCVSXWSP : XX2Form<60, 184,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvsxwsp $XT, $XB", IIC_VecFP,
-                      [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>;
+                      [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>;
  def XVCVUXDDP : XX2Form<60, 488,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvuxddp $XT, $XB", IIC_VecFP,
-                      [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+                      [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>;
  def XVCVUXDSP : XX2Form<60, 424,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvuxdsp $XT, $XB", IIC_VecFP,
@ -884,7 +884,7 @@ let hasSideEffects = 0 in {
  def XVCVUXWSP : XX2Form<60, 168,
                      (outs vsrc:$XT), (ins vsrc:$XB),
                      "xvcvuxwsp $XT, $XB", IIC_VecFP,
-                      [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
+                      [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>;

  // Rounding Instructions respecting current rounding mode
  def XSRDPIC : XX2Form<60, 107,
--- a/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll