1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[RISCV] Support EXTRACT_SUBVECTOR on vector masks

This patch adds support for extracting subvectors from vector masks.
This can be either extracting a scalable vector from another, or a fixed-length
vector from a fixed-length or scalable vector.

Since RVV lacks a way to slide vector masks down on an element-wise
basis and we don't know the true length of the vector registers, in many
cases we must resort to using equivalently-sized i8 vectors to perform
the operation. When this is not possible we fall back and extend to a
suitable i8 vector.

Support was also added for fixed-length truncation to mask types.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D97475
This commit is contained in:
Fraser Cormack 2021-02-25 08:15:25 +00:00
parent 5ace0d2963
commit 5b136b7998
3 changed files with 433 additions and 19 deletions

View File

@ -419,6 +419,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
for (MVT VT : IntVecVTs) {
@ -537,12 +538,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
continue;
}
@ -578,7 +582,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
@ -2119,28 +2122,35 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
EVT VecVT = Src.getValueType();
MVT VecVT = Src.getSimpleValueType();
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
// Be careful not to introduce illegal scalar types at this stage, and be
// careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
// illegal and must be expanded. Since we know that the constants are
// sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
bool IsRV32E64 =
!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
if (!IsRV32E64) {
SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
} else {
SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
if (VecVT.isScalableVector()) {
SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
}
SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SDValue Trunc =
DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
DAG.getCondCode(ISD::SETNE), Mask, VL);
return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
@ -2511,6 +2521,43 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned OrigIdx = Op.getConstantOperandVal(1);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
// We don't have the ability to slide mask vectors down indexed by their i1
// elements; the smallest we can do is i8. Often we are able to bitcast to
// equivalent i8 vectors. Note that when extracting a fixed-length vector
// from a scalable one, we might not necessarily have enough scalable
// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
if (VecVT.getVectorMinNumElements() >= 8 &&
SubVecVT.getVectorMinNumElements() >= 8) {
assert(OrigIdx % 8 == 0 && "Invalid index");
assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
SubVecVT.getVectorMinNumElements() % 8 == 0 &&
"Unexpected mask vector lowering");
OrigIdx /= 8;
SubVecVT =
MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
SubVecVT.isScalableVector());
VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
VecVT.isScalableVector());
Vec = DAG.getBitcast(VecVT, Vec);
} else {
// We can't slide this mask vector down, indexed by its i1 elements.
// This poses a problem when we wish to extract a scalable vector which
// can't be re-expressed as a larger type. Just choose the slow path and
// extend to a larger type, then truncate back down.
// TODO: We could probably improve this when extracting certain fixed
// from fixed, where we can extract as i8 and shift the correct element
// right to reach the desired subvector?
MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
Op.getOperand(1));
SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
}
}
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
@ -2577,8 +2624,12 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
// Now the vector is in the right position, extract our final subvector. This
// should resolve to a COPY.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
DAG.getConstant(0, DL, XLenVT));
Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
DAG.getConstant(0, DL, XLenVT));
// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
}
SDValue

View File

@ -358,6 +358,51 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv16f16_4(<vscale x 16 x half> %vec
ret <vscale x 2 x half> %c
}
define <vscale x 8 x i1> @extract_nxv64i1_nxv8i1_0(<vscale x 64 x i1> %mask) {
; CHECK-LABEL: extract_nxv64i1_nxv8i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
%c = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %mask, i64 0)
ret <vscale x 8 x i1> %c
}
define <vscale x 8 x i1> @extract_nxv64i1_nxv8i1_8(<vscale x 64 x i1> %mask) {
; CHECK-LABEL: extract_nxv64i1_nxv8i1_8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vslidedown.vx v0, v0, a0
; CHECK-NEXT: ret
%c = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %mask, i64 8)
ret <vscale x 8 x i1> %c
}
define <vscale x 2 x i1> @extract_nxv64i1_nxv2i1_0(<vscale x 64 x i1> %mask) {
; CHECK-LABEL: extract_nxv64i1_nxv2i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
%c = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %mask, i64 0)
ret <vscale x 2 x i1> %c
}
define <vscale x 2 x i1> @extract_nxv64i1_nxv2i1_2(<vscale x 64 x i1> %mask) {
; CHECK-LABEL: extract_nxv64i1_nxv2i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vslidedown.vx v25, v8, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%c = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %mask, i64 2)
ret <vscale x 2 x i1> %c
}
declare <vscale x 1 x i8> @llvm.experimental.vector.extract.nxv1i8.nxv8i8(<vscale x 8 x i8> %vec, i64 %idx)
declare <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 %idx)
@ -373,3 +418,6 @@ declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<v
declare <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
declare <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv16f16(<vscale x 16 x half> %vec, i64 %idx)
declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %vec, i64 %idx)

View File

@ -152,6 +152,321 @@ define void @extract_v8i32_nxv16i32_8(<vscale x 16 x i32> %x, <8 x i32>* %y) {
ret void
}
define void @extract_v8i1_v64i1_0(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX2-LABEL: extract_v8i1_v64i1_0:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v25, (a0)
; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v8i1_v64i1_0:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_v64i1_8(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX2-LABEL: extract_v8i1_v64i1_8:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v25, (a0)
; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v8i1_v64i1_8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 1, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_v64i1_48(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX2-LABEL: extract_v8i1_v64i1_48:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a0, a0, 4
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v25, (a0)
; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v8i1_v64i1_48:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a0, a0, 6
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv2i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v0, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv2i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a1, 8, e8,mf4,ta,mu
; CHECK-NEXT: vslidedown.vi v25, v25, 2
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v26, v25, 0
; CHECK-NEXT: vse1.v v26, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv64i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v0, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv64i1_8(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv64i1_8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; CHECK-NEXT: vslidedown.vi v25, v0, 1
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 8)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv64i1_48(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv64i1_48:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; CHECK-NEXT: vslidedown.vi v25, v0, 6
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 48)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2-LABEL: extract_v2i1_v64i1_0:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v25, (a0)
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v2i1_v64i1_0:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2-LABEL: extract_v2i1_v64i1_2:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v0, (a0)
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v2i1_v64i1_2:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v0, (a0)
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-NEXT: vse1.v v26, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2-LABEL: extract_v2i1_v64i1_42:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a0, a0, 4
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vle1.v v0, (a0)
; LMULMAX2-NEXT: vmv.v.i v26, 0
; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v2i1_v64i1_42:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a0, a0, 4
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle1.v v0, (a0)
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10
; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-NEXT: vse1.v v26, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv2i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v0, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv2i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu
; CHECK-NEXT: vslidedown.vi v25, v25, 2
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v26, v25, 0
; CHECK-NEXT: vse1.v v26, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_nxv64i1_0(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv64i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vse1.v v0, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_nxv64i1_2(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv64i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v25, v8, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
define void @extract_v2i1_nxv64i1_42(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv64i1_42:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: addi a1, zero, 42
; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v25, v8, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
declare <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)