1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[RISCV] Add support for RVV int<->fp & fp<->fp conversions

This patch adds support for the full range of vector int-to-float,
float-to-int, and float-to-float conversions on legal types.

Many conversions are supported natively in RVV so are lowered with
patterns. These include conversions between (element) types of the same
size, and those that are half/double the size of the input. When
conversions take place between types that are less than half or more
than double the size we must lower them using sequences of instructions
which go via intermediate types.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D95447
This commit is contained in:
Fraser Cormack 2021-01-22 14:54:00 +00:00
parent a688c9d8f1
commit 100d58ed00
7 changed files with 4775 additions and 0 deletions

View File

@ -402,6 +402,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
@ -427,9 +436,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Expand various condition codes (explained above).
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
};
@ -771,6 +787,99 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
case ISD::FP_EXTEND: {
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
// via f32.
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
// We only need to close the gap between vXf16->vXf64.
if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
SrcVT.getVectorElementType() != MVT::f16)
return Op;
SDLoc DL(Op);
MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue IntermediateRound =
DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
}
case ISD::FP_ROUND: {
// RVV can only do fp_round to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
// We only need to close the gap between vXf64<->vXf16.
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
SrcVT.getVectorElementType() != MVT::f64)
return Op;
SDLoc DL(Op);
MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue IntermediateRound =
DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: {
// RVV can only do fp<->int conversions to types half/double the size as
// the source. We custom-lower any conversions that do two hops into
// sequences.
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return Op;
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
MVT EltVT = VT.getVectorElementType();
MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
unsigned EltSize = EltVT.getSizeInBits();
unsigned SrcEltSize = SrcEltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
"Unexpected vector element types");
bool IsInt2FP = SrcEltVT.isInteger();
// Widening conversions
if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
if (IsInt2FP) {
// Do a regular integer sign/zero extension then convert to float.
MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
VT.getVectorElementCount());
unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND;
SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
}
// FP2Int
assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
// Do one doubling fp_extend then complete the operation by converting
// to int.
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
}
// Narrowing conversions
if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
if (IsInt2FP) {
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
return DAG.getFPExtendOrRound(Int2FP, DL, VT);
}
// FP2Int
// One narrowing fp_to_int, then truncate the integer. If the float isn't
// representable by the integer, the result is poison.
MVT IVecVT =
MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
VT.getVectorElementCount());
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
return Op;
}
}
}
@ -4012,6 +4121,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSLIDEUP)
NODE_NAME_CASE(VSLIDEDOWN)
NODE_NAME_CASE(VID)
NODE_NAME_CASE(VFNCVT_ROD)
}
// clang-format on
return nullptr;

View File

@ -106,6 +106,10 @@ enum NodeType : unsigned {
VSLIDEDOWN,
// Matches the semantics of the unmasked vid.v instruction.
VID,
// Matches the semantics of the vfcnvt.rod function (Convert double-width
// float to single-width float, rounding towards odd). Takes a double-width
// float vector and produces a single-width float vector.
VFNCVT_ROD,
};
} // namespace RISCVISD

View File

@ -291,6 +291,64 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
}
}
multiclass VPatConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
ivti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
}
multiclass VPatConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
fvti.RegClass:$rs1, ivti.AVL, ivti.SEW)>;
}
}
multiclass VPatWConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar ivti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
ivti.RegClass:$rs1, ivti.AVL, ivti.SEW)>;
}
}
multiclass VPatWConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
}
multiclass VPatNConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
iwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
}
multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
fwti.RegClass:$rs1, vti.AVL, vti.SEW)>;
}
}
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@ -440,6 +498,10 @@ foreach mti = AllMasks in {
} // Predicates = [HasStdExtV]
def riscv_fncvt_rod
: SDNode<"RISCVISD::VFNCVT_ROD",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
let Predicates = [HasStdExtV, HasStdExtF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
@ -489,6 +551,43 @@ foreach fvti = AllFloatVectors in {
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.SEW)>;
}
// 14.15. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm "" : VPatConvertFP2ISDNode_V<fp_to_sint, "PseudoVFCVT_RTZ_X_F_V">;
defm "" : VPatConvertFP2ISDNode_V<fp_to_uint, "PseudoVFCVT_RTZ_XU_F_V">;
defm "" : VPatConvertI2FPSDNode_V<sint_to_fp, "PseudoVFCVT_F_X_V">;
defm "" : VPatConvertI2FPSDNode_V<uint_to_fp, "PseudoVFCVT_F_XU_V">;
// 14.16. Widening Floating-Point/Integer Type-Convert Instructions
defm "" : VPatWConvertFP2ISDNode_V<fp_to_sint, "PseudoVFWCVT_RTZ_X_F_V">;
defm "" : VPatWConvertFP2ISDNode_V<fp_to_uint, "PseudoVFWCVT_RTZ_XU_F_V">;
defm "" : VPatWConvertI2FPSDNode_V<sint_to_fp, "PseudoVFWCVT_F_X_V">;
defm "" : VPatWConvertI2FPSDNode_V<uint_to_fp, "PseudoVFWCVT_F_XU_V">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
def : Pat<(fwti.Vector (fpextend (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
// 14.17. Narrowing Floating-Point/Integer Type-Convert Instructions
defm "" : VPatNConvertFP2ISDNode_V<fp_to_sint, "PseudoVFNCVT_RTZ_X_F_W">;
defm "" : VPatNConvertFP2ISDNode_V<fp_to_uint, "PseudoVFNCVT_RTZ_XU_F_W">;
defm "" : VPatNConvertI2FPSDNode_V<sint_to_fp, "PseudoVFNCVT_F_X_W">;
defm "" : VPatNConvertI2FPSDNode_V<uint_to_fp, "PseudoVFNCVT_F_XU_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
def : Pat<(fvti.Vector (riscv_fncvt_rod (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,261 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
;
; RV32-LABEL: vfpext_nxv1f16_nxv1f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv1f16_nxv1f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fpext <vscale x 1 x half> %va to <vscale x 1 x float>
ret <vscale x 1 x float> %evec
}
define <vscale x 1 x double> @vfpext_nxv1f16_nxv1f64(<vscale x 1 x half> %va) {
;
; RV32-LABEL: vfpext_nxv1f16_nxv1f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v25, v8
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv1f16_nxv1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v25, v8
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v8, v25
; RV64-NEXT: ret
%evec = fpext <vscale x 1 x half> %va to <vscale x 1 x double>
ret <vscale x 1 x double> %evec
}
define <vscale x 2 x float> @vfpext_nxv2f16_nxv2f32(<vscale x 2 x half> %va) {
;
; RV32-LABEL: vfpext_nxv2f16_nxv2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv2f16_nxv2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fpext <vscale x 2 x half> %va to <vscale x 2 x float>
ret <vscale x 2 x float> %evec
}
define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %va) {
;
; RV32-LABEL: vfpext_nxv2f16_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v25, v8
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv2f16_nxv2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v25, v8
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v8, v25
; RV64-NEXT: ret
%evec = fpext <vscale x 2 x half> %va to <vscale x 2 x double>
ret <vscale x 2 x double> %evec
}
define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) {
;
; RV32-LABEL: vfpext_nxv4f16_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v26, v8
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv4f16_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v26, v8
; RV64-NEXT: vmv2r.v v8, v26
; RV64-NEXT: ret
%evec = fpext <vscale x 4 x half> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
}
define <vscale x 4 x double> @vfpext_nxv4f16_nxv4f64(<vscale x 4 x half> %va) {
;
; RV32-LABEL: vfpext_nxv4f16_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v26, v8
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv4f16_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v26, v8
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v8, v26
; RV64-NEXT: ret
%evec = fpext <vscale x 4 x half> %va to <vscale x 4 x double>
ret <vscale x 4 x double> %evec
}
define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) {
;
; RV32-LABEL: vfpext_nxv8f16_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v28, v8
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv8f16_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v28, v8
; RV64-NEXT: vmv4r.v v8, v28
; RV64-NEXT: ret
%evec = fpext <vscale x 8 x half> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
}
define <vscale x 8 x double> @vfpext_nxv8f16_nxv8f64(<vscale x 8 x half> %va) {
;
; RV32-LABEL: vfpext_nxv8f16_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v28, v8
; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv8f16_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v28, v8
; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v8, v28
; RV64-NEXT: ret
%evec = fpext <vscale x 8 x half> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
}
define <vscale x 16 x float> @vfpext_nxv16f16_nxv16f32(<vscale x 16 x half> %va) {
;
; RV32-LABEL: vfpext_nxv16f16_nxv16f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v16, v8
; RV32-NEXT: vmv8r.v v8, v16
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv16f16_nxv16f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v16, v8
; RV64-NEXT: vmv8r.v v8, v16
; RV64-NEXT: ret
%evec = fpext <vscale x 16 x half> %va to <vscale x 16 x float>
ret <vscale x 16 x float> %evec
}
define <vscale x 1 x double> @vfpext_nxv1f32_nxv1f64(<vscale x 1 x float> %va) {
;
; RV32-LABEL: vfpext_nxv1f32_nxv1f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv1f32_nxv1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fpext <vscale x 1 x float> %va to <vscale x 1 x double>
ret <vscale x 1 x double> %evec
}
define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) {
;
; RV32-LABEL: vfpext_nxv2f32_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v26, v8
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv2f32_nxv2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v26, v8
; RV64-NEXT: vmv2r.v v8, v26
; RV64-NEXT: ret
%evec = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
ret <vscale x 2 x double> %evec
}
define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) {
;
; RV32-LABEL: vfpext_nxv4f32_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v28, v8
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv4f32_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v28, v8
; RV64-NEXT: vmv4r.v v8, v28
; RV64-NEXT: ret
%evec = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
ret <vscale x 4 x double> %evec
}
define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
;
; RV32-LABEL: vfpext_nxv8f32_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32-NEXT: vfwcvt.f.f.v v16, v8
; RV32-NEXT: vmv8r.v v8, v16
; RV32-NEXT: ret
;
; RV64-LABEL: vfpext_nxv8f32_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64-NEXT: vfwcvt.f.f.v v16, v8
; RV64-NEXT: vmv8r.v v8, v16
; RV64-NEXT: ret
%evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,261 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) {
;
; RV32-LABEL: vfptrunc_nxv1f32_nxv1f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfncvt.f.f.w v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv1f32_nxv1f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfncvt.f.f.w v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 1 x float> %va to <vscale x 1 x half>
ret <vscale x 1 x half> %evec
}
define <vscale x 2 x half> @vfptrunc_nxv2f32_nxv2f16(<vscale x 2 x float> %va) {
;
; RV32-LABEL: vfptrunc_nxv2f32_nxv2f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv2f32_nxv2f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 2 x float> %va to <vscale x 2 x half>
ret <vscale x 2 x half> %evec
}
define <vscale x 4 x half> @vfptrunc_nxv4f32_nxv4f16(<vscale x 4 x float> %va) {
;
; RV32-LABEL: vfptrunc_nxv4f32_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfncvt.f.f.w v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv4f32_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfncvt.f.f.w v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 4 x float> %va to <vscale x 4 x half>
ret <vscale x 4 x half> %evec
}
define <vscale x 8 x half> @vfptrunc_nxv8f32_nxv8f16(<vscale x 8 x float> %va) {
;
; RV32-LABEL: vfptrunc_nxv8f32_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v26, v8
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv8f32_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v26, v8
; RV64-NEXT: vmv2r.v v8, v26
; RV64-NEXT: ret
%evec = fptrunc <vscale x 8 x float> %va to <vscale x 8 x half>
ret <vscale x 8 x half> %evec
}
define <vscale x 16 x half> @vfptrunc_nxv16f32_nxv16f16(<vscale x 16 x float> %va) {
;
; RV32-LABEL: vfptrunc_nxv16f32_nxv16f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; RV32-NEXT: vfncvt.f.f.w v28, v8
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv16f32_nxv16f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; RV64-NEXT: vfncvt.f.f.w v28, v8
; RV64-NEXT: vmv4r.v v8, v28
; RV64-NEXT: ret
%evec = fptrunc <vscale x 16 x float> %va to <vscale x 16 x half>
ret <vscale x 16 x half> %evec
}
define <vscale x 1 x half> @vfptrunc_nxv1f64_nxv1f16(<vscale x 1 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv1f64_nxv1f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfncvt.rod.f.f.w v25, v8
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfncvt.f.f.w v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv1f64_nxv1f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfncvt.rod.f.f.w v25, v8
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfncvt.f.f.w v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x half>
ret <vscale x 1 x half> %evec
}
define <vscale x 1 x float> @vfptrunc_nxv1f64_nxv1f32(<vscale x 1 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv1f64_nxv1f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv1f64_nxv1f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x float>
ret <vscale x 1 x float> %evec
}
define <vscale x 2 x half> @vfptrunc_nxv2f64_nxv2f16(<vscale x 2 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv2f64_nxv2f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfncvt.rod.f.f.w v25, v8
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv2f64_nxv2f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfncvt.rod.f.f.w v25, v8
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x half>
ret <vscale x 2 x half> %evec
}
define <vscale x 2 x float> @vfptrunc_nxv2f64_nxv2f32(<vscale x 2 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv2f64_nxv2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfncvt.f.f.w v25, v8
; RV32-NEXT: vmv1r.v v8, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv2f64_nxv2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfncvt.f.f.w v25, v8
; RV64-NEXT: vmv1r.v v8, v25
; RV64-NEXT: ret
%evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x float>
ret <vscale x 2 x float> %evec
}
define <vscale x 4 x half> @vfptrunc_nxv4f64_nxv4f16(<vscale x 4 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv4f64_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfncvt.rod.f.f.w v26, v8
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfncvt.f.f.w v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv4f64_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfncvt.rod.f.f.w v26, v8
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfncvt.f.f.w v8, v26
; RV64-NEXT: ret
%evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x half>
ret <vscale x 4 x half> %evec
}
define <vscale x 4 x float> @vfptrunc_nxv4f64_nxv4f32(<vscale x 4 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv4f64_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v26, v8
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv4f64_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v26, v8
; RV64-NEXT: vmv2r.v v8, v26
; RV64-NEXT: ret
%evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
}
define <vscale x 8 x half> @vfptrunc_nxv8f64_nxv8f16(<vscale x 8 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv8f64_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32-NEXT: vfncvt.rod.f.f.w v28, v8
; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV32-NEXT: vfncvt.f.f.w v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv8f64_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64-NEXT: vfncvt.rod.f.f.w v28, v8
; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; RV64-NEXT: vfncvt.f.f.w v8, v28
; RV64-NEXT: ret
%evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x half>
ret <vscale x 8 x half> %evec
}
define <vscale x 8 x float> @vfptrunc_nxv8f64_nxv8f32(<vscale x 8 x double> %va) {
;
; RV32-LABEL: vfptrunc_nxv8f64_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV32-NEXT: vfncvt.f.f.w v28, v8
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vfptrunc_nxv8f64_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; RV64-NEXT: vfncvt.f.f.w v28, v8
; RV64-NEXT: vmv4r.v v8, v28
; RV64-NEXT: ret
%evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
}

File diff suppressed because it is too large Load Diff