[RISCV] Add support for RVV int<->fp & fp<->fp conversions

This patch adds support for the full range of vector int-to-float, float-to-int, and float-to-float conversions on legal types. Many conversions are supported natively in RVV so are lowered with patterns. These include conversions between (element) types of the same size, and those that are half/double the size of the input. When conversions take place between types that are less than half or more than double the size we must lower them using sequences of instructions which go via intermediate types. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D95447
2024-11-22 10:42:39 +01:00 · 2021-01-22 14:54:00 +00:00 · 2021-01-22 14:54:00 +00:00 · 100d58ed00
commit 100d58ed00
parent a688c9d8f1
7 changed files with 4775 additions and 0 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -402,6 +402,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
      setOperationAction(ISD::ZERO_EXTEND, VT, Custom);

+      // RVV has native int->float & float->int conversions where the
+      // element type sizes are within one power-of-two of each other. Any
+      // wider distances between type sizes have to be lowered as sequences
+      // which progressively narrow the gap in stages.
+      setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+      setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+      setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+      setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+
      // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
      // nodes which truncate by one power of two at a time.
      setOperationAction(ISD::TRUNCATE, VT, Custom);
@ -427,9 +436,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
    // Sets common operation actions on RVV floating-point vector types.
    const auto SetCommonVFPActions = [&](MVT VT) {
      setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+      // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
+      // sizes are within one power-of-two of each other. Therefore conversions
+      // between vXf16 and vXf64 must be lowered as sequences which convert via
+      // vXf32.
+      setOperationAction(ISD::FP_ROUND, VT, Custom);
+      setOperationAction(ISD::FP_EXTEND, VT, Custom);
      // Custom-lower insert/extract operations to simplify patterns.
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      // Expand various condition codes (explained above).
      for (auto CC : VFPCCToExpand)
        setCondCodeAction(CC, VT, Expand);
    };
@ -771,6 +787,99 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
                                 DAG.getConstant(3, DL, VT));
    return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
  }
+  case ISD::FP_EXTEND: {
+    // RVV can only do fp_extend to types double the size as the source. We
+    // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
+    // via f32.
+    MVT VT = Op.getSimpleValueType();
+    MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+    // We only need to close the gap between vXf16->vXf64.
+    if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
+        SrcVT.getVectorElementType() != MVT::f16)
+      return Op;
+    SDLoc DL(Op);
+    MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+    SDValue IntermediateRound =
+        DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
+    return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
+  }
+  case ISD::FP_ROUND: {
+    // RVV can only do fp_round to types half the size as the source. We
+    // custom-lower f64->f16 rounds via RVV's round-to-odd float
+    // conversion instruction.
+    MVT VT = Op.getSimpleValueType();
+    MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+    // We only need to close the gap between vXf64<->vXf16.
+    if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
+        SrcVT.getVectorElementType() != MVT::f64)
+      return Op;
+    SDLoc DL(Op);
+    MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+    SDValue IntermediateRound =
+        DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
+    return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
+  }
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: {
+    // RVV can only do fp<->int conversions to types half/double the size as
+    // the source. We custom-lower any conversions that do two hops into
+    // sequences.
+    MVT VT = Op.getSimpleValueType();
+    if (!VT.isVector())
+      return Op;
+    SDLoc DL(Op);
+    SDValue Src = Op.getOperand(0);
+    MVT EltVT = VT.getVectorElementType();
+    MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
+    unsigned EltSize = EltVT.getSizeInBits();
+    unsigned SrcEltSize = SrcEltVT.getSizeInBits();
+    assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
+           "Unexpected vector element types");
+    bool IsInt2FP = SrcEltVT.isInteger();
+    // Widening conversions
+    if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
+      if (IsInt2FP) {
+        // Do a regular integer sign/zero extension then convert to float.
+        MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
+                                      VT.getVectorElementCount());
+        unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
+                                 ? ISD::ZERO_EXTEND
+                                 : ISD::SIGN_EXTEND;
+        SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
+        return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
+      }
+      // FP2Int
+      assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
+      // Do one doubling fp_extend then complete the operation by converting
+      // to int.
+      MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+      SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
+      return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
+    }
+
+    // Narrowing conversions
+    if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
+      if (IsInt2FP) {
+        // One narrowing int_to_fp, then an fp_round.
+        assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
+        MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+        SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
+        return DAG.getFPExtendOrRound(Int2FP, DL, VT);
+      }
+      // FP2Int
+      // One narrowing fp_to_int, then truncate the integer. If the float isn't
+      // representable by the integer, the result is poison.
+      MVT IVecVT =
+          MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
+                           VT.getVectorElementCount());
+      SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
+    }
+
+    return Op;
+  }
  }
 }

@ -4012,6 +4121,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(VSLIDEUP)
  NODE_NAME_CASE(VSLIDEDOWN)
  NODE_NAME_CASE(VID)
+  NODE_NAME_CASE(VFNCVT_ROD)
  }
  // clang-format on
  return nullptr;
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@ -106,6 +106,10 @@ enum NodeType : unsigned {
  VSLIDEDOWN,
  // Matches the semantics of the unmasked vid.v instruction.
  VID,
+  // Matches the semantics of the vfcnvt.rod function (Convert double-width
+  // float to single-width float, rounding towards odd). Takes a double-width
+  // float vector and produces a single-width float vector.
+  VFNCVT_ROD,
 };
 } // namespace RISCVISD

--- a/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@ -291,6 +291,64 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
  }
 }

+multiclass VPatConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+  foreach fvti = AllFloatVectors in {
+    defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+    def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+                  ivti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+  }
+}
+
+multiclass VPatConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+  foreach fvti = AllFloatVectors in {
+    defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+    def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
+                  fvti.RegClass:$rs1, ivti.AVL, ivti.SEW)>;
+  }
+}
+
+multiclass VPatWConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+  foreach vtiToWti = AllWidenableIntToFloatVectors in {
+    defvar ivti = vtiToWti.Vti;
+    defvar fwti = vtiToWti.Wti;
+    def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
+                  ivti.RegClass:$rs1, ivti.AVL, ivti.SEW)>;
+  }
+}
+
+multiclass VPatWConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+  foreach fvtiToFWti = AllWidenableFloatVectors in {
+    defvar fvti = fvtiToFWti.Vti;
+    defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+    def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+                  fvti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+  }
+}
+
+multiclass VPatNConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+  foreach fvtiToFWti = AllWidenableFloatVectors in {
+    defvar fvti = fvtiToFWti.Vti;
+    defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+    def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+                  iwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+  }
+}
+
+multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+  foreach vtiToWti = AllWidenableIntToFloatVectors in {
+    defvar vti = vtiToWti.Vti;
+    defvar fwti = vtiToWti.Wti;
+    def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))),
+              (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
+                  fwti.RegClass:$rs1, vti.AVL, vti.SEW)>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Patterns.
 //===----------------------------------------------------------------------===//
@ -440,6 +498,10 @@ foreach mti = AllMasks in {

 } // Predicates = [HasStdExtV]

+def riscv_fncvt_rod
+    : SDNode<"RISCVISD::VFNCVT_ROD",
+             SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
+
 let Predicates = [HasStdExtV, HasStdExtF] in {

 // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
@ -489,6 +551,43 @@ foreach fvti = AllFloatVectors in {
            (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
                 fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.SEW)>;
 }
+
+// 14.15. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
+defm "" : VPatConvertFP2ISDNode_V<fp_to_sint, "PseudoVFCVT_RTZ_X_F_V">;
+defm "" : VPatConvertFP2ISDNode_V<fp_to_uint, "PseudoVFCVT_RTZ_XU_F_V">;
+defm "" : VPatConvertI2FPSDNode_V<sint_to_fp, "PseudoVFCVT_F_X_V">;
+defm "" : VPatConvertI2FPSDNode_V<uint_to_fp, "PseudoVFCVT_F_XU_V">;
+
+// 14.16. Widening Floating-Point/Integer Type-Convert Instructions
+defm "" : VPatWConvertFP2ISDNode_V<fp_to_sint, "PseudoVFWCVT_RTZ_X_F_V">;
+defm "" : VPatWConvertFP2ISDNode_V<fp_to_uint, "PseudoVFWCVT_RTZ_XU_F_V">;
+defm "" : VPatWConvertI2FPSDNode_V<sint_to_fp, "PseudoVFWCVT_F_X_V">;
+defm "" : VPatWConvertI2FPSDNode_V<uint_to_fp, "PseudoVFWCVT_F_XU_V">;
+foreach fvtiToFWti = AllWidenableFloatVectors in {
+  defvar fvti = fvtiToFWti.Vti;
+  defvar fwti = fvtiToFWti.Wti;
+  def : Pat<(fwti.Vector (fpextend (fvti.Vector fvti.RegClass:$rs1))),
+            (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX)
+                fvti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+}
+
+// 14.17. Narrowing Floating-Point/Integer Type-Convert Instructions
+defm "" : VPatNConvertFP2ISDNode_V<fp_to_sint, "PseudoVFNCVT_RTZ_X_F_W">;
+defm "" : VPatNConvertFP2ISDNode_V<fp_to_uint, "PseudoVFNCVT_RTZ_XU_F_W">;
+defm "" : VPatNConvertI2FPSDNode_V<sint_to_fp, "PseudoVFNCVT_F_X_W">;
+defm "" : VPatNConvertI2FPSDNode_V<uint_to_fp, "PseudoVFNCVT_F_XU_W">;
+foreach fvtiToFWti = AllWidenableFloatVectors in {
+  defvar fvti = fvtiToFWti.Vti;
+  defvar fwti = fvtiToFWti.Wti;
+  def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
+            (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
+                fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+
+  def : Pat<(fvti.Vector (riscv_fncvt_rod (fwti.Vector fwti.RegClass:$rs1))),
+            (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
+                fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
+}
+
 } // Predicates = [HasStdExtV, HasStdExtF]

 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv1f16_nxv1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv1f16_nxv1f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 1 x half> %va to <vscale x 1 x float>
+  ret <vscale x 1 x float> %evec
+}
+
+define <vscale x 1 x double> @vfpext_nxv1f16_nxv1f64(<vscale x 1 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv1f16_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v25, v8
+; RV32-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv1f16_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v25, v8
+; RV64-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 1 x half> %va to <vscale x 1 x double>
+  ret <vscale x 1 x double> %evec
+}
+
+define <vscale x 2 x float> @vfpext_nxv2f16_nxv2f32(<vscale x 2 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv2f16_nxv2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv2f16_nxv2f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 2 x half> %va to <vscale x 2 x float>
+  ret <vscale x 2 x float> %evec
+}
+
+define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv2f16_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v25, v8
+; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv2f16_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v25, v8
+; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 2 x half> %va to <vscale x 2 x double>
+  ret <vscale x 2 x double> %evec
+}
+
+define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv4f16_nxv4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v26, v8
+; RV32-NEXT:    vmv2r.v v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv4f16_nxv4f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v26, v8
+; RV64-NEXT:    vmv2r.v v8, v26
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 4 x half> %va to <vscale x 4 x float>
+  ret <vscale x 4 x float> %evec
+}
+
+define <vscale x 4 x double> @vfpext_nxv4f16_nxv4f64(<vscale x 4 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv4f16_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v26, v8
+; RV32-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv4f16_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v26, v8
+; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v8, v26
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 4 x half> %va to <vscale x 4 x double>
+  ret <vscale x 4 x double> %evec
+}
+
+define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv8f16_nxv8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v28, v8
+; RV32-NEXT:    vmv4r.v v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv8f16_nxv8f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v28, v8
+; RV64-NEXT:    vmv4r.v v8, v28
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 8 x half> %va to <vscale x 8 x float>
+  ret <vscale x 8 x float> %evec
+}
+
+define <vscale x 8 x double> @vfpext_nxv8f16_nxv8f64(<vscale x 8 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv8f16_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v28, v8
+; RV32-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv8f16_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v28, v8
+; RV64-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v8, v28
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 8 x half> %va to <vscale x 8 x double>
+  ret <vscale x 8 x double> %evec
+}
+
+define <vscale x 16 x float> @vfpext_nxv16f16_nxv16f32(<vscale x 16 x half> %va) {
+;
+; RV32-LABEL: vfpext_nxv16f16_nxv16f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v16, v8
+; RV32-NEXT:    vmv8r.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv16f16_nxv16f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v16, v8
+; RV64-NEXT:    vmv8r.v v8, v16
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 16 x half> %va to <vscale x 16 x float>
+  ret <vscale x 16 x float> %evec
+}
+
+define <vscale x 1 x double> @vfpext_nxv1f32_nxv1f64(<vscale x 1 x float> %va) {
+;
+; RV32-LABEL: vfpext_nxv1f32_nxv1f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv1f32_nxv1f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 1 x float> %va to <vscale x 1 x double>
+  ret <vscale x 1 x double> %evec
+}
+
+define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) {
+;
+; RV32-LABEL: vfpext_nxv2f32_nxv2f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v26, v8
+; RV32-NEXT:    vmv2r.v v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv2f32_nxv2f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v26, v8
+; RV64-NEXT:    vmv2r.v v8, v26
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
+  ret <vscale x 2 x double> %evec
+}
+
+define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) {
+;
+; RV32-LABEL: vfpext_nxv4f32_nxv4f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v28, v8
+; RV32-NEXT:    vmv4r.v v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv4f32_nxv4f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v28, v8
+; RV64-NEXT:    vmv4r.v v8, v28
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
+  ret <vscale x 4 x double> %evec
+}
+
+define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
+;
+; RV32-LABEL: vfpext_nxv8f32_nxv8f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfwcvt.f.f.v v16, v8
+; RV32-NEXT:    vmv8r.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfpext_nxv8f32_nxv8f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfwcvt.f.f.v v16, v8
+; RV64-NEXT:    vmv8r.v v8, v16
+; RV64-NEXT:    ret
+  %evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
+  ret <vscale x 8 x double> %evec
+}
+
--- a/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
--- a/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv1f32_nxv1f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv1f32_nxv1f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 1 x float> %va to <vscale x 1 x half>
+  ret <vscale x 1 x half> %evec
+}
+
+define <vscale x 2 x half> @vfptrunc_nxv2f32_nxv2f16(<vscale x 2 x float> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv2f32_nxv2f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv2f32_nxv2f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 2 x float> %va to <vscale x 2 x half>
+  ret <vscale x 2 x half> %evec
+}
+
+define <vscale x 4 x half> @vfptrunc_nxv4f32_nxv4f16(<vscale x 4 x float> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv4f32_nxv4f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv4f32_nxv4f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 4 x float> %va to <vscale x 4 x half>
+  ret <vscale x 4 x half> %evec
+}
+
+define <vscale x 8 x half> @vfptrunc_nxv8f32_nxv8f16(<vscale x 8 x float> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv8f32_nxv8f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v26, v8
+; RV32-NEXT:    vmv2r.v v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv8f32_nxv8f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v26, v8
+; RV64-NEXT:    vmv2r.v v8, v26
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 8 x float> %va to <vscale x 8 x half>
+  ret <vscale x 8 x half> %evec
+}
+
+define <vscale x 16 x half> @vfptrunc_nxv16f32_nxv16f16(<vscale x 16 x float> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv16f32_nxv16f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v28, v8
+; RV32-NEXT:    vmv4r.v v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv16f32_nxv16f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v28, v8
+; RV64-NEXT:    vmv4r.v v8, v28
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 16 x float> %va to <vscale x 16 x half>
+  ret <vscale x 16 x half> %evec
+}
+
+define <vscale x 1 x half> @vfptrunc_nxv1f64_nxv1f16(<vscale x 1 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv1f64_nxv1f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV32-NEXT:    vfncvt.rod.f.f.w v25, v8
+; RV32-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv1f64_nxv1f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV64-NEXT:    vfncvt.rod.f.f.w v25, v8
+; RV64-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x half>
+  ret <vscale x 1 x half> %evec
+}
+
+define <vscale x 1 x float> @vfptrunc_nxv1f64_nxv1f32(<vscale x 1 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv1f64_nxv1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv1f64_nxv1f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x float>
+  ret <vscale x 1 x float> %evec
+}
+
+define <vscale x 2 x half> @vfptrunc_nxv2f64_nxv2f16(<vscale x 2 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv2f64_nxv2f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV32-NEXT:    vfncvt.rod.f.f.w v25, v8
+; RV32-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv2f64_nxv2f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV64-NEXT:    vfncvt.rod.f.f.w v25, v8
+; RV64-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x half>
+  ret <vscale x 2 x half> %evec
+}
+
+define <vscale x 2 x float> @vfptrunc_nxv2f64_nxv2f32(<vscale x 2 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv2f64_nxv2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v25, v8
+; RV32-NEXT:    vmv1r.v v8, v25
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv2f64_nxv2f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v25, v8
+; RV64-NEXT:    vmv1r.v v8, v25
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x float>
+  ret <vscale x 2 x float> %evec
+}
+
+define <vscale x 4 x half> @vfptrunc_nxv4f64_nxv4f16(<vscale x 4 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv4f64_nxv4f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfncvt.rod.f.f.w v26, v8
+; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv4f64_nxv4f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfncvt.rod.f.f.w v26, v8
+; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v8, v26
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x half>
+  ret <vscale x 4 x half> %evec
+}
+
+define <vscale x 4 x float> @vfptrunc_nxv4f64_nxv4f32(<vscale x 4 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv4f64_nxv4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v26, v8
+; RV32-NEXT:    vmv2r.v v8, v26
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv4f64_nxv4f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v26, v8
+; RV64-NEXT:    vmv2r.v v8, v26
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x float>
+  ret <vscale x 4 x float> %evec
+}
+
+define <vscale x 8 x half> @vfptrunc_nxv8f64_nxv8f16(<vscale x 8 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv8f64_nxv8f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfncvt.rod.f.f.w v28, v8
+; RV32-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv8f64_nxv8f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfncvt.rod.f.f.w v28, v8
+; RV64-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v8, v28
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x half>
+  ret <vscale x 8 x half> %evec
+}
+
+define <vscale x 8 x float> @vfptrunc_nxv8f64_nxv8f32(<vscale x 8 x double> %va) {
+;
+; RV32-LABEL: vfptrunc_nxv8f64_nxv8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfncvt.f.f.w v28, v8
+; RV32-NEXT:    vmv4r.v v8, v28
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vfptrunc_nxv8f64_nxv8f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfncvt.f.f.w v28, v8
+; RV64-NEXT:    vmv4r.v v8, v28
+; RV64-NEXT:    ret
+  %evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x float>
+  ret <vscale x 8 x float> %evec
+}
+
--- a/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
+++ b/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll