1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00

[RISCV] Add support for splat fixed length build_vectors using RVV.

Building on the fixed vector support from D95705

I've added ISD nodes for vmv.v.x and vfmv.v.f and switched to
lowering the intrinsics to it. This allows us to share the same
isel patterns for both.

This doesn't handle splats of i64 on RV32 yet. The build_vector
gets converted to a vXi32 build_vector+bitcast during type
legalization. Not sure the best way to handle this at the moment.

Differential Revision: https://reviews.llvm.org/D96108
This commit is contained in:
Craig Topper 2021-02-08 10:59:38 -08:00
parent 7567c19539
commit ef4a878d63
6 changed files with 1240 additions and 58 deletions

View File

@ -523,6 +523,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@ -551,6 +553,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
@ -758,6 +762,81 @@ static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
}
}
// Return the largest legal scalable vector type that matches VT's element type.
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
switch (VT.getVectorElementType().SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i8:
return MVT::getScalableVectorVT(MVT::i8, LMul * 8);
case MVT::i16:
return MVT::getScalableVectorVT(MVT::i16, LMul * 4);
case MVT::i32:
return MVT::getScalableVectorVT(MVT::i32, LMul * 2);
case MVT::i64:
return MVT::getScalableVectorVT(MVT::i64, LMul);
case MVT::f16:
return MVT::getScalableVectorVT(MVT::f16, LMul * 4);
case MVT::f32:
return MVT::getScalableVectorVT(MVT::f32, LMul * 2);
case MVT::f64:
return MVT::getScalableVectorVT(MVT::f64, LMul);
}
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
return SDValue();
}
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@ -1005,6 +1084,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
return lowerFPVECREDUCE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::LOAD:
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
case ISD::STORE:
@ -1704,6 +1785,15 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::riscv_vmv_v_x: {
SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(),
Op.getOperand(1));
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
Scalar, Op.getOperand(2));
}
case Intrinsic::riscv_vfmv_v_f:
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
}
@ -1861,60 +1951,6 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
// Return the largest legal scalable vector type that matches VT's element type.
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
switch (VT.getVectorElementType().SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i8:
return MVT::getScalableVectorVT(MVT::i8, LMul * 8);
case MVT::i16:
return MVT::getScalableVectorVT(MVT::i16, LMul * 4);
case MVT::i32:
return MVT::getScalableVectorVT(MVT::i32, LMul * 2);
case MVT::i64:
return MVT::getScalableVectorVT(MVT::i64, LMul);
case MVT::f16:
return MVT::getScalableVectorVT(MVT::f16, LMul * 4);
case MVT::f32:
return MVT::getScalableVectorVT(MVT::f32, LMul * 2);
case MVT::f64:
return MVT::getScalableVectorVT(MVT::f64, LMul);
}
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
SDValue
RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
SelectionDAG &DAG) const {
@ -4540,6 +4576,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(GREVIW)
NODE_NAME_CASE(GORCI)
NODE_NAME_CASE(GORCIW)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(READ_VLENB)

View File

@ -89,6 +89,12 @@ enum NodeType : unsigned {
GORCI,
GORCIW,
// Vector Extension
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation.
VMV_V_X_VL,
// VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
// for the VL value to be used for the operation.
VFMV_V_F_VL,
// VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
// extended from the vector element size.
VMV_X_S,

View File

@ -14,6 +14,15 @@
///
//===----------------------------------------------------------------------===//
def riscv_vmv_v_x_vl : SDNode<"RISCVISD::VMV_V_X_VL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisVT<1, XLenVT>,
SDTCisVT<2, XLenVT>]>>;
def riscv_vfmv_v_f_vl : SDNode<"RISCVISD::VFMV_V_F_VL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisEltOfVec<1, 0>,
SDTCisVT<2, XLenVT>]>>;
def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>,
SDTCisInt<1>]>>;
@ -4048,10 +4057,10 @@ foreach vti = AllVectors in {
}
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
$rs2, GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
def : Pat<(vti.Vector (riscv_vmv_v_x_vl simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
simm5:$imm5, GPR:$vl, vti.SEW)>;
}
@ -4206,12 +4215,12 @@ foreach fvti = AllFloatVectors in {
//===----------------------------------------------------------------------===//
foreach fvti = AllFloatVectors in {
// If we're splatting fpimm0, use vmv.v.x vd, x0.
def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
(fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, GPR:$vl, fvti.SEW)>;
def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
(fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
fvti.LMul.MX)

View File

@ -0,0 +1,233 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
define void @splat_v8f16(<8 x half>* %x, half %y) {
; CHECK-LABEL: splat_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x half> undef, half %y, i32 0
%b = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> zeroinitializer
store <8 x half> %b, <8 x half>* %x
ret void
}
define void @splat_v4f32(<4 x float>* %x, float %y) {
; CHECK-LABEL: splat_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x float> undef, float %y, i32 0
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
store <4 x float> %b, <4 x float>* %x
ret void
}
define void @splat_v2f64(<2 x double>* %x, double %y) {
; CHECK-LABEL: splat_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vse64.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x double> undef, double %y, i32 0
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %b, <2 x double>* %x
ret void
}
define void @splat_16f16(<16 x half>* %x, half %y) {
; LMULMAX2-LABEL: splat_16f16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vfmv.v.f v26, fa0
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_16f16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vfmv.v.f v25, fa0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x half> undef, half %y, i32 0
%b = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> zeroinitializer
store <16 x half> %b, <16 x half>* %x
ret void
}
define void @splat_v8f32(<8 x float>* %x, float %y) {
; LMULMAX2-LABEL: splat_v8f32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vfmv.v.f v26, fa0
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v8f32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vfmv.v.f v25, fa0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x float> undef, float %y, i32 0
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
store <8 x float> %b, <8 x float>* %x
ret void
}
define void @splat_v4f64(<4 x double>* %x, double %y) {
; LMULMAX2-LABEL: splat_v4f64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 4
; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu
; LMULMAX2-NEXT: vfmv.v.f v26, fa0
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v4f64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 2
; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; LMULMAX1-NEXT: vfmv.v.f v25, fa0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x double> undef, double %y, i32 0
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
store <4 x double> %b, <4 x double>* %x
ret void
}
define void @splat_zero_v8f16(<8 x half>* %x) {
; CHECK-LABEL: splat_zero_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x half> undef, half 0.0, i32 0
%b = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> zeroinitializer
store <8 x half> %b, <8 x half>* %x
ret void
}
define void @splat_zero_v4f32(<4 x float>* %x) {
; CHECK-LABEL: splat_zero_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x float> undef, float 0.0, i32 0
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
store <4 x float> %b, <4 x float>* %x
ret void
}
define void @splat_zero_v2f64(<2 x double>* %x) {
; CHECK-LABEL: splat_zero_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse64.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x double> undef, double 0.0, i32 0
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %b, <2 x double>* %x
ret void
}
define void @splat_zero_16f16(<16 x half>* %x) {
; LMULMAX2-LABEL: splat_zero_16f16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_16f16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x half> undef, half 0.0, i32 0
%b = shufflevector <16 x half> %a, <16 x half> undef, <16 x i32> zeroinitializer
store <16 x half> %b, <16 x half>* %x
ret void
}
define void @splat_zero_v8f32(<8 x float>* %x) {
; LMULMAX2-LABEL: splat_zero_v8f32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v8f32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x float> undef, float 0.0, i32 0
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
store <8 x float> %b, <8 x float>* %x
ret void
}
define void @splat_zero_v4f64(<4 x double>* %x) {
; LMULMAX2-LABEL: splat_zero_v4f64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 4
; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v4f64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 2
; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x double> undef, double 0.0, i32 0
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
store <4 x double> %b, <4 x double>* %x
ret void
}

View File

@ -0,0 +1,437 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
define void @splat_v16i8(<16 x i8>* %x, i8 %y) {
; CHECK-LABEL: splat_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 %y, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_v8i16(<8 x i16>* %x, i16 %y) {
; CHECK-LABEL: splat_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 %y, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_v4i32(<4 x i32>* %x, i32 %y) {
; CHECK-LABEL: splat_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 4
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 %y, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
; FIXME: Support i64 splats on riscv32
;define void @splat_v2i64(<2 x i64>* %x, i64 %y) {
; %a = insertelement <2 x i64> undef, i64 %y, i32 0
; %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
; store <2 x i64> %b, <2 x i64>* %x
; ret void
;}
define void @splat_v32i8(<32 x i8>* %x, i8 %y) {
; LMULMAX2-LABEL: splat_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 16
; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 %y, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_v16i16(<16 x i16>* %x, i16 %y) {
; LMULMAX2-LABEL: splat_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 16
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 8
; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 %y, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_v8i32(<8 x i32>* %x, i32 %y) {
; LMULMAX2-LABEL: splat_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 8
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 4
; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 %y, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
; FIXME: Support i64 splats on riscv32
;define void @splat_v4i64(<4 x i64>* %x, i64 %y) {
; %a = insertelement <4 x i64> undef, i64 %y, i32 0
; %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
; store <4 x i64> %b, <4 x i64>* %x
; ret void
;}
define void @splat_zero_v16i8(<16 x i8>* %x) {
; CHECK-LABEL: splat_zero_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 16
; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 0, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_zero_v8i16(<8 x i16>* %x) {
; CHECK-LABEL: splat_zero_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 0, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_zero_v4i32(<4 x i32>* %x) {
; CHECK-LABEL: splat_zero_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 0, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
define void @splat_zero_v2i64(<2 x i64>* %x) {
; CHECK-LABEL: splat_zero_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i64> undef, i64 0, i32 0
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %b, <2 x i64>* %x
ret void
}
define void @splat_zero_v32i8(<32 x i8>* %x) {
; LMULMAX2-LABEL: splat_zero_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 32
; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 16
; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 0, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_zero_v16i16(<16 x i16>* %x) {
; LMULMAX2-LABEL: splat_zero_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 0, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_zero_v8i32(<8 x i32>* %x) {
; LMULMAX2-LABEL: splat_zero_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 0, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
define void @splat_zero_v4i64(<4 x i64>* %x) {
; LMULMAX2-LABEL: splat_zero_v4i64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x i64> undef, i64 0, i32 0
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %b, <4 x i64>* %x
ret void
}
define void @splat_allones_v16i8(<16 x i8>* %x) {
; CHECK-LABEL: splat_allones_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 16
; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 -1, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_allones_v8i16(<8 x i16>* %x) {
; CHECK-LABEL: splat_allones_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 -1, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_allones_v4i32(<4 x i32>* %x) {
; CHECK-LABEL: splat_allones_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 -1, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
define void @splat_allones_v2i64(<2 x i64>* %x) {
; CHECK-LABEL: splat_allones_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i64> undef, i64 -1, i32 0
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %b, <2 x i64>* %x
ret void
}
define void @splat_allones_v32i8(<32 x i8>* %x) {
; LMULMAX2-LABEL: splat_allones_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 32
; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 16
; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 -1, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_allones_v16i16(<16 x i16>* %x) {
; LMULMAX2-LABEL: splat_allones_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 -1, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_allones_v8i32(<8 x i32>* %x) {
; LMULMAX2-LABEL: splat_allones_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 -1, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
define void @splat_allones_v4i64(<4 x i64>* %x) {
; LMULMAX2-LABEL: splat_allones_v4i64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x i64> undef, i64 -1, i32 0
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %b, <4 x i64>* %x
ret void
}

View File

@ -0,0 +1,459 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
define void @splat_v16i8(<16 x i8>* %x, i8 %y) {
; CHECK-LABEL: splat_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 %y, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_v8i16(<8 x i16>* %x, i16 %y) {
; CHECK-LABEL: splat_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 %y, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_v4i32(<4 x i32>* %x, i32 %y) {
; CHECK-LABEL: splat_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 4
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 %y, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
define void @splat_v2i64(<2 x i64>* %x, i64 %y) {
; CHECK-LABEL: splat_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 2
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vse64.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i64> undef, i64 %y, i32 0
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %b, <2 x i64>* %x
ret void
}
define void @splat_v32i8(<32 x i8>* %x, i8 %y) {
; LMULMAX2-LABEL: splat_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 16
; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 %y, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_v16i16(<16 x i16>* %x, i16 %y) {
; LMULMAX2-LABEL: splat_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 16
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 8
; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 %y, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_v8i32(<8 x i32>* %x, i32 %y) {
; LMULMAX2-LABEL: splat_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 8
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 4
; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 %y, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
define void @splat_v4i64(<4 x i64>* %x, i64 %y) {
; LMULMAX2-LABEL: splat_v4i64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 4
; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a2, zero, 2
; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.x v25, a1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x i64> undef, i64 %y, i32 0
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %b, <4 x i64>* %x
ret void
}
define void @splat_zero_v16i8(<16 x i8>* %x) {
; CHECK-LABEL: splat_zero_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 16
; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 0, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_zero_v8i16(<8 x i16>* %x) {
; CHECK-LABEL: splat_zero_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 0, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_zero_v4i32(<4 x i32>* %x) {
; CHECK-LABEL: splat_zero_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 0, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
define void @splat_zero_v2i64(<2 x i64>* %x) {
; CHECK-LABEL: splat_zero_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vse64.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i64> undef, i64 0, i32 0
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %b, <2 x i64>* %x
ret void
}
define void @splat_zero_v32i8(<32 x i8>* %x) {
; LMULMAX2-LABEL: splat_zero_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 32
; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 16
; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 0, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_zero_v16i16(<16 x i16>* %x) {
; LMULMAX2-LABEL: splat_zero_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 0, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_zero_v8i32(<8 x i32>* %x) {
; LMULMAX2-LABEL: splat_zero_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 0, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
define void @splat_zero_v4i64(<4 x i64>* %x) {
; LMULMAX2-LABEL: splat_zero_v4i64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 4
; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_zero_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 2
; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x i64> undef, i64 0, i32 0
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %b, <4 x i64>* %x
ret void
}
define void @splat_allones_v16i8(<16 x i8>* %x) {
; CHECK-LABEL: splat_allones_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 16
; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse8.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i8> undef, i8 -1, i32 0
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %b, <16 x i8>* %x
ret void
}
define void @splat_allones_v8i16(<8 x i16>* %x) {
; CHECK-LABEL: splat_allones_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse16.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i16> undef, i16 -1, i32 0
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %b, <8 x i16>* %x
ret void
}
define void @splat_allones_v4i32(<4 x i32>* %x) {
; CHECK-LABEL: splat_allones_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i32> undef, i32 -1, i32 0
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %b, <4 x i32>* %x
ret void
}
define void @splat_allones_v2i64(<2 x i64>* %x) {
; CHECK-LABEL: splat_allones_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vse64.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i64> undef, i64 -1, i32 0
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %b, <2 x i64>* %x
ret void
}
define void @splat_allones_v32i8(<32 x i8>* %x) {
; LMULMAX2-LABEL: splat_allones_v32i8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 32
; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 16
; LMULMAX1-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 -1, i32 0
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %b, <32 x i8>* %x
ret void
}
define void @splat_allones_v16i16(<16 x i16>* %x) {
; LMULMAX2-LABEL: splat_allones_v16i16:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 16
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 -1, i32 0
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %b, <16 x i16>* %x
ret void
}
define void @splat_allones_v8i32(<8 x i32>* %x) {
; LMULMAX2-LABEL: splat_allones_v8i32:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 8
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse32.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 -1, i32 0
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %b, <8 x i32>* %x
ret void
}
define void @splat_allones_v4i64(<4 x i64>* %x) {
; LMULMAX2-LABEL: splat_allones_v4i64:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, zero, 4
; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.i v26, -1
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: splat_allones_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, zero, 2
; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x i64> undef, i64 -1, i32 0
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %b, <4 x i64>* %x
ret void
}