1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[RISCV] Support the scalable-vector fadd reduction intrinsic

This patch adds support for both the fadd reduction intrinsic, in both
the ordered and unordered modes.

The fmin and fmax intrinsics are not currently supported due to a
discrepancy between the LLVM semantics and the RVV ISA behaviour with
regards to signaling NaNs. This behaviour is likely fixed in version 2.3
of the RISC-V F/D/Q extension, but until then the intrinsics can be left
unsupported.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D95870
This commit is contained in:
Fraser Cormack 2021-02-02 14:34:51 +00:00
parent 36869ae64f
commit cfd59be3f3
5 changed files with 525 additions and 13 deletions

View File

@ -468,6 +468,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Expand various condition codes (explained above).
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
};
if (Subtarget.hasStdExtZfh())
@ -922,6 +925,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
return lowerFPVECREDUCE(Op, DAG);
}
}
@ -1698,6 +1704,43 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
}
// Given a reduction op, this function returns the matching reduction opcode,
// the vector SDValue and the scalar SDValue required to lower this to a
// RISCVISD node.
static std::tuple<unsigned, SDValue, SDValue>
getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
SDLoc DL(Op);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_FADD:
return {RISCVISD::VECREDUCE_FADD, Op.getOperand(0),
DAG.getConstantFP(0.0, DL, EltVT)};
case ISD::VECREDUCE_SEQ_FADD:
return {RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), Op.getOperand(0)};
}
}
SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecEltVT = Op.getSimpleValueType();
// We have to perform a bit of a dance to get from our vector type to the
// correct LMUL=1 vector type. See above for an explanation.
unsigned NumElts = 64 / VecEltVT.getSizeInBits();
MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
unsigned RVVOpcode;
SDValue VectorVal, ScalarVal;
std::tie(RVVOpcode, VectorVal, ScalarVal) =
getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
@ -4264,6 +4307,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VECREDUCE_AND)
NODE_NAME_CASE(VECREDUCE_OR)
NODE_NAME_CASE(VECREDUCE_XOR)
NODE_NAME_CASE(VECREDUCE_FADD)
NODE_NAME_CASE(VECREDUCE_SEQ_FADD)
}
// clang-format on
return nullptr;

View File

@ -131,6 +131,8 @@ enum NodeType : unsigned {
VECREDUCE_AND,
VECREDUCE_OR,
VECREDUCE_XOR,
VECREDUCE_FADD,
VECREDUCE_SEQ_FADD,
};
} // namespace RISCVISD
@ -333,6 +335,7 @@ private:
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

View File

@ -49,7 +49,8 @@ def SDTRVVVecReduce : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>
]>;
foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR"] in
foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR",
"FADD", "SEQ_FADD"] in
def rvv_vecreduce_#kind : SDNode<"RISCVISD::VECREDUCE_"#kind, SDTRVVVecReduce>;
multiclass VPatUSLoadStoreSDNode<ValueType type,
@ -362,9 +363,9 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
}
}
multiclass VPatReductionSDNode<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
defvar vti_m1 = !cast<VTypeInfo>("VI" # vti.SEW # "M1");
multiclass VPatReductionSDNode<SDNode vop, string instruction_name, bit is_float> {
foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in {
defvar vti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # vti.SEW # "M1");
def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2)),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
(vti_m1.Vector (IMPLICIT_DEF)),
@ -495,14 +496,18 @@ foreach vti = AllIntegerVectors in {
}
// 15.1. Vector Single-Width Integer Reduction Instructions
defm "" : VPatReductionSDNode<rvv_vecreduce_ADD, "PseudoVREDSUM">;
defm "" : VPatReductionSDNode<rvv_vecreduce_UMAX, "PseudoVREDMAXU">;
defm "" : VPatReductionSDNode<rvv_vecreduce_SMAX, "PseudoVREDMAX">;
defm "" : VPatReductionSDNode<rvv_vecreduce_UMIN, "PseudoVREDMINU">;
defm "" : VPatReductionSDNode<rvv_vecreduce_SMIN, "PseudoVREDMIN">;
defm "" : VPatReductionSDNode<rvv_vecreduce_AND, "PseudoVREDAND">;
defm "" : VPatReductionSDNode<rvv_vecreduce_OR, "PseudoVREDOR">;
defm "" : VPatReductionSDNode<rvv_vecreduce_XOR, "PseudoVREDXOR">;
defm "" : VPatReductionSDNode<rvv_vecreduce_ADD, "PseudoVREDSUM", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_UMAX, "PseudoVREDMAXU", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_SMAX, "PseudoVREDMAX", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_UMIN, "PseudoVREDMINU", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_SMIN, "PseudoVREDMIN", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_AND, "PseudoVREDAND", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_OR, "PseudoVREDOR", /*is_float*/0>;
defm "" : VPatReductionSDNode<rvv_vecreduce_XOR, "PseudoVREDXOR", /*is_float*/0>;
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
defm "" : VPatReductionSDNode<rvv_vecreduce_SEQ_FADD, "PseudoVFREDOSUM", /*is_float*/1>;
defm "" : VPatReductionSDNode<rvv_vecreduce_FADD, "PseudoVFREDSUM", /*is_float*/1>;
// 16.1. Vector Mask-Register Logical Instructions
foreach mti = AllMasks in {

View File

@ -102,9 +102,19 @@ bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
switch (II->getIntrinsicID()) {
default:
return false;
// These reductions have no equivalent in RVV
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul:
// The fmin and fmax intrinsics are not currently supported due to a
// discrepancy between the LLVM semantics and the RVV 0.10 ISA behaviour with
// regards to signaling NaNs: the vector fmin/fmax reduction intrinsics match
// the behaviour minnum/maxnum intrinsics, whereas the vfredmin/vfredmax
// instructions match the vfmin/vfmax instructions which match the equivalent
// scalar fmin/fmax instructions as defined in 2.2 F/D/Q extension (see
// https://bugs.llvm.org/show_bug.cgi?id=27363).
// This behaviour is likely fixed in version 2.3 of the RISC-V F/D/Q
// extension, where fmin/fmax behave like minnum/maxnum, but until then the
// intrinsics are left unsupported.
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
return true;

View File

@ -0,0 +1,449 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; RV32-LABEL: vreduce_fadd_nxv1f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.h fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv1f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.h fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
ret half %red
}
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv1f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv1f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
ret half %red
}
declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; RV32-LABEL: vreduce_fadd_nxv2f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.h fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv2f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.h fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
ret half %red
}
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv2f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv2f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
ret half %red
}
declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; RV32-LABEL: vreduce_fadd_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.h fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.h fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
ret half %red
}
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
ret half %red
}
declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; RV32-LABEL: vreduce_fadd_nxv1f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.s fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv1f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.s fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
ret float %red
}
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv1f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv1f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
ret float %red
}
declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; RV32-LABEL: vreduce_fadd_nxv2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.s fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.s fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
ret float %red
}
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
ret float %red
}
declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; RV32-LABEL: vreduce_fadd_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.s fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.s fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
ret float %red
}
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
ret float %red
}
declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; RV32-LABEL: vreduce_fadd_nxv1f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.d fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.d fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
ret double %red
}
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv1f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
ret double %red
}
declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; RV32-LABEL: vreduce_fadd_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.d fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.d fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
ret double %red
}
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
ret double %red
}
declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; RV32-LABEL: vreduce_fadd_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.v.i v25, 0
; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; RV32-NEXT: vfredsum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.f.s ft0, v25
; RV32-NEXT: fadd.d fa0, fa0, ft0
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_fadd_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.i v25, 0
; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; RV64-NEXT: vfredsum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.f.s ft0, v25
; RV64-NEXT: fadd.d fa0, fa0, ft0
; RV64-NEXT: ret
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
ret double %red
}
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; RV32-LABEL: vreduce_ord_fadd_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.v.f v25, fa0
; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; RV32-NEXT: vfredosum.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vfmv.f.s fa0, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_ord_fadd_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.v.f v25, fa0
; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; RV64-NEXT: vfredosum.vs v25, v8, v25
; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV64-NEXT: vfmv.f.s fa0, v25
; RV64-NEXT: ret
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
ret double %red
}