1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SelectionDAG] Extend FoldConstantVectorArithmetic to SPLAT_VECTOR

This patch extends the SelectionDAG's ability to constant-fold vector
arithmetic to include support for SPLAT_VECTOR. This is not only for
scalable-vector types but also for fixed-length vector types, which
helps Hexagon in a couple of cases.

The original RISC-V test case was in fact an infinite DAGCombine loop.
The pattern `and (truncate v1), (truncate v2)` can be combined to
`truncate (and v1, v2)` but the truncate can similarly be combined back
to `truncate (and v1, v2)` (but, crucially, only when one of `v1` or
`v2` is a constant vector).

It wasn't exposed in on fixed-length types because a TRUNCATE of a
constant BUILD_VECTOR was folded into the BUILD_VECTOR itself, whereas
this did not happen for the equivalent (scalable-vector) SPLAT_VECTOR.

Reviewed By: RKSimon, craig.topper

Differential Revision: https://reviews.llvm.org/D103246
This commit is contained in:
Fraser Cormack 2021-05-27 13:45:23 +01:00
parent 672a2c4fb0
commit 5ee63b3d02
9 changed files with 240 additions and 456 deletions

View File

@ -4742,41 +4742,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Constant fold unary operations with a vector integer or float operand.
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
if (BV->isConstant()) {
switch (Opcode) {
default:
// FIXME: Entirely reasonable to perform folding of other unary
// operations here as the need arises.
break;
case ISD::FNEG:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::TRUNCATE:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = { Operand };
if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
}
switch (Opcode) {
default:
// FIXME: Entirely reasonable to perform folding of other unary
// operations here as the need arises.
break;
case ISD::FNEG:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::TRUNCATE:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = {Operand};
if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
unsigned OpOpcode = Operand.getNode()->getOpcode();
@ -5292,30 +5288,26 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (!VT.isVector())
return SDValue();
// TODO: All the folds below are performed lane-by-lane and assume a fixed
// vector width, however we should be able to do constant folds involving
// splat vector nodes too.
if (VT.isScalableVector())
return SDValue();
ElementCount NumElts = VT.getVectorElementCount();
// From this point onwards all vectors are assumed to be fixed width.
unsigned NumElts = VT.getVectorNumElements();
auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorNumElements() == NumElts;
Op.getValueType().getVectorElementCount() == NumElts;
};
auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
APInt SplatVal;
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
(BV && BV->isConstant());
return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
(BV && BV->isConstant()) ||
(Op.getOpcode() == ISD::SPLAT_VECTOR &&
ISD::isConstantSplatVector(Op.getNode(), SplatVal));
};
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@ -5332,14 +5324,19 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
return SDValue();
}
// For scalable vector types we know we're dealing with SPLAT_VECTORs. We
// only have one operand to check. For fixed-length vector types we may have
// a combination of BUILD_VECTOR and SPLAT_VECTOR.
unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
for (unsigned i = 0; i != NumElts; i++) {
for (unsigned I = 0; I != NumOperands; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
if (!InBV) {
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
Op.getOpcode() != ISD::SPLAT_VECTOR) {
// We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
@ -5348,7 +5345,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
continue;
}
SDValue ScalarOp = InBV->getOperand(i);
SDValue ScalarOp =
Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
EVT ScalarVT = ScalarOp.getValueType();
// Build vector (integer) scalar operands may need implicit
@ -5373,7 +5371,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
ScalarResults.push_back(ScalarResult);
}
SDValue V = getBuildVector(VT, DL, ScalarResults);
SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0])
: getBuildVector(VT, DL, ScalarResults);
NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
return V;
}

View File

@ -73,11 +73,8 @@ define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, #1 // =0x1
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: lsr z1.b, z1.b, #1
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, #3
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: lsr z0.b, z0.b, #1
; CHECK-NEXT: ret
%div = udiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer)
ret <vscale x 16 x i8> %div
@ -87,13 +84,10 @@ define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: udiv_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-21845
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: mov z1.h, #1 // =0x1
; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z0.h
; CHECK-NEXT: lsr z2.h, z2.h, #1
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, #3
; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: lsr z0.h, z0.h, #1
; CHECK-NEXT: ret
%div = udiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
ret <vscale x 8 x i16> %div
@ -104,13 +98,10 @@ define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: mov z1.s, #3 // =0x3
; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z0.s
; CHECK-NEXT: lsr z2.s, z2.s, #1
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, #1
; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsr z0.s, z0.s, #1
; CHECK-NEXT: ret
%div = udiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
ret <vscale x 4 x i32> %div
@ -121,13 +112,10 @@ define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: movk x8, #43691
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov z1.d, #3 // =0x3
; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z0.d
; CHECK-NEXT: lsr z2.d, z2.d, #1
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, #1
; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z0.d, z0.d, #1
; CHECK-NEXT: ret
%div = udiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
ret <vscale x 2 x i64> %div

View File

@ -33,15 +33,12 @@ define i32 @f1(<4 x i1>* %a0, <4 x i8> %a1) #0 {
; CHECK-NEXT: r0 = memub(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #0
; CHECK-NEXT: r3:2 = combine(#0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r5:4 = vsxtbh(r1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vsxtbh(r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = r0
; CHECK-NEXT: }
; CHECK-NEXT: {
@ -142,16 +139,13 @@ define void @f5(<4 x i1>* %a0, i32 %a1) #0 {
; CHECK-LABEL: f5:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #0
; CHECK-NEXT: r3:2 = vsxtbh(r1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r5:4 = vsxtbh(r1)
; CHECK-NEXT: r5:4 = combine(#0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vsxtbh(r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = vcmph.eq(r5:4,r3:2)
; CHECK-NEXT: p0 = vcmph.eq(r3:2,r5:4)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1 = mux(p0,#0,#1)

View File

@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
; These tests check that the scalable-vector version of this series of
; instructions does not get into an infinite DAGCombine loop. This was
; originally exposing an infinite loop between an 'and' of two truncates being promoted
; to the larger value type, then that 'truncate' being split back up into an
; 'and' of two truncates.
; This didn't happen in the fixed-length test because a truncate of the
; constant BUILD_VECTOR is folded into the BUILD_VECTOR itself. The truncate of
; a constant SPLAT_VECTOR didn't follow suit.
define <2 x i16> @fixedlen(<2 x i32> %x) {
; RV32-LABEL: fixedlen:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32,mf2,ta,mu
; RV32-NEXT: vsrl.vi v25, v8, 16
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: vand.vx v25, v25, a0
; RV32-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; RV32-NEXT: vnsrl.wi v8, v25, 0
; RV32-NEXT: ret
;
; RV64-LABEL: fixedlen:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32,mf2,ta,mu
; RV64-NEXT: vsrl.vi v25, v8, 16
; RV64-NEXT: lui a0, 32
; RV64-NEXT: addiw a0, a0, -1
; RV64-NEXT: slli a0, a0, 15
; RV64-NEXT: vand.vx v25, v25, a0
; RV64-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
; RV64-NEXT: vnsrl.wi v8, v25, 0
; RV64-NEXT: ret
%v41 = insertelement <2 x i32> undef, i32 16, i32 0
%v42 = shufflevector <2 x i32> %v41, <2 x i32> undef, <2 x i32> zeroinitializer
%v43 = lshr <2 x i32> %x, %v42
%v44 = trunc <2 x i32> %v43 to <2 x i16>
%v45 = insertelement <2 x i32> undef, i32 -32768, i32 0
%v46 = shufflevector <2 x i32> %v45, <2 x i32> undef, <2 x i32> zeroinitializer
%v47 = trunc <2 x i32> %v46 to <2 x i16>
%v48 = and <2 x i16> %v44, %v47
ret <2 x i16> %v48
}
define <vscale x 2 x i16> @scalable(<vscale x 2 x i32> %x) {
; CHECK-LABEL: scalable:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vsrl.vi v25, v8, 16
; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu
; CHECK-NEXT: vnsrl.wi v25, v25, 0
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vand.vx v8, v25, a0
; CHECK-NEXT: ret
%v41 = insertelement <vscale x 2 x i32> undef, i32 16, i32 0
%v42 = shufflevector <vscale x 2 x i32> %v41, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
%v43 = lshr <vscale x 2 x i32> %x, %v42
%v44 = trunc <vscale x 2 x i32> %v43 to <vscale x 2 x i16>
%v45 = insertelement <vscale x 2 x i32> undef, i32 -32768, i32 0
%v46 = shufflevector <vscale x 2 x i32> %v45, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
%v47 = trunc <vscale x 2 x i32> %v46 to <vscale x 2 x i16>
%v48 = and <vscale x 2 x i16> %v44, %v47
ret <vscale x 2 x i16> %v48
}

View File

@ -3077,8 +3077,7 @@ define <vscale x 8 x i1> @icmp_eq_ii_nxv8i8() {
; CHECK-LABEL: icmp_eq_ii_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, 5
; CHECK-NEXT: vmseq.vi v0, v25, 2
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: ret
%heada = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
%splata = shufflevector <vscale x 8 x i8> %heada, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer

View File

@ -29,10 +29,7 @@ define <vscale x 1 x i8> @vdivu_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@ -92,10 +89,7 @@ define <vscale x 2 x i8> @vdivu_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@ -131,10 +125,7 @@ define <vscale x 4 x i8> @vdivu_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@ -170,10 +161,7 @@ define <vscale x 8 x i8> @vdivu_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@ -209,10 +197,7 @@ define <vscale x 16 x i8> @vdivu_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 5
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vi v8, v26, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@ -248,10 +233,7 @@ define <vscale x 32 x i8> @vdivu_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 5
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vi v8, v28, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@ -286,11 +268,8 @@ define <vscale x 64 x i8> @vdivu_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 5
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@ -327,10 +306,7 @@ define <vscale x 1 x i16> @vdivu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@ -367,10 +343,7 @@ define <vscale x 2 x i16> @vdivu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@ -407,10 +380,7 @@ define <vscale x 4 x i16> @vdivu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@ -447,10 +417,7 @@ define <vscale x 8 x i16> @vdivu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 13
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vi v8, v26, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@ -487,10 +454,7 @@ define <vscale x 16 x i16> @vdivu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 13
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vi v8, v28, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@ -526,11 +490,8 @@ define <vscale x 32 x i16> @vdivu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 13
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@ -563,14 +524,11 @@ define <vscale x 1 x i32> @vdivu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
define <vscale x 1 x i32> @vdivu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; CHECK-LABEL: vdivu_vi_nxv1i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v8, v25, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@ -603,14 +561,11 @@ define <vscale x 2 x i32> @vdivu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
define <vscale x 2 x i32> @vdivu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vdivu_vi_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v8, v25, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@ -643,14 +598,11 @@ define <vscale x 4 x i32> @vdivu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
define <vscale x 4 x i32> @vdivu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vdivu_vi_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 29
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v8, v26, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@ -683,14 +635,11 @@ define <vscale x 8 x i32> @vdivu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
define <vscale x 8 x i32> @vdivu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vdivu_vi_nxv8i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v12, v8, a0
; CHECK-NEXT: vsrl.vi v12, v12, 29
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v8, v28, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@ -723,14 +672,11 @@ define <vscale x 16 x i32> @vdivu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b) {
define <vscale x 16 x i32> @vdivu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; CHECK-LABEL: vdivu_vi_nxv16i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v24, v8, a0
; CHECK-NEXT: vsrl.vi v24, v24, 29
; CHECK-NEXT: vmseq.vi v0, v16, 1
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@ -772,19 +718,16 @@ define <vscale x 1 x i64> @vdivu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: vmulhu.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v25, v25, a0
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vx v8, v25, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
@ -827,19 +770,16 @@ define <vscale x 2 x i64> @vdivu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: vmulhu.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v26, v26, a0
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vx v8, v26, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
@ -882,19 +822,16 @@ define <vscale x 4 x i64> @vdivu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmv.v.i v12, -7
; CHECK-NEXT: vmulhu.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v28, v28, a0
; CHECK-NEXT: vmseq.vi v0, v12, 1
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vx v8, v28, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
@ -937,19 +874,16 @@ define <vscale x 8 x i64> @vdivu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmv.v.i v24, -7
; CHECK-NEXT: vmulhu.vv v16, v8, v16
; CHECK-NEXT: vmulhu.vv v8, v8, v16
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v16, v16, a0
; CHECK-NEXT: vmseq.vi v0, v24, 1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vsrl.vx v8, v8, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0

View File

@ -29,10 +29,7 @@ define <vscale x 1 x i8> @vdivu_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@ -68,10 +65,7 @@ define <vscale x 2 x i8> @vdivu_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@ -107,10 +101,7 @@ define <vscale x 4 x i8> @vdivu_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@ -146,10 +137,7 @@ define <vscale x 8 x i8> @vdivu_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@ -185,10 +173,7 @@ define <vscale x 16 x i8> @vdivu_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 5
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vi v8, v26, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@ -224,10 +209,7 @@ define <vscale x 32 x i8> @vdivu_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 5
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vi v8, v28, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@ -262,11 +244,8 @@ define <vscale x 64 x i8> @vdivu_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, 33
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 5
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 5
; CHECK-NEXT: ret
%head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0
%splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@ -303,10 +282,7 @@ define <vscale x 1 x i16> @vdivu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@ -343,10 +319,7 @@ define <vscale x 2 x i16> @vdivu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@ -383,10 +356,7 @@ define <vscale x 4 x i16> @vdivu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@ -423,10 +393,7 @@ define <vscale x 8 x i16> @vdivu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 13
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vi v8, v26, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@ -463,10 +430,7 @@ define <vscale x 16 x i16> @vdivu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 13
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vi v8, v28, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@ -502,11 +466,8 @@ define <vscale x 32 x i16> @vdivu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 13
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 13
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@ -543,10 +504,7 @@ define <vscale x 1 x i32> @vdivu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@ -583,10 +541,7 @@ define <vscale x 2 x i32> @vdivu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
; CHECK-NEXT: vsrl.vi v8, v25, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@ -623,10 +578,7 @@ define <vscale x 4 x i32> @vdivu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vi v8, v26, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@ -663,10 +615,7 @@ define <vscale x 8 x i32> @vdivu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 29
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vi v8, v28, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@ -702,11 +651,8 @@ define <vscale x 16 x i32> @vdivu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 29
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: vsrl.vi v8, v8, 29
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0
%splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@ -739,16 +685,13 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
define <vscale x 1 x i64> @vdivu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv1i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v26, v26, a0
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
; CHECK-NEXT: vsrl.vx v8, v25, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
@ -781,16 +724,13 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
define <vscale x 2 x i64> @vdivu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv2i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v28, v28, a0
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
; CHECK-NEXT: vsrl.vx v8, v26, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
@ -823,16 +763,13 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
define <vscale x 4 x i64> @vdivu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv4i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v12, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v12, v12, a0
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vsrl.vx v8, v28, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@ -865,16 +802,13 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
define <vscale x 8 x i64> @vdivu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v24, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v8, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v24, v24, a0
; CHECK-NEXT: vmseq.vi v0, v16, 1
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: vsrl.vx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer

View File

@ -30,9 +30,6 @@ define <vscale x 1 x i8> @vremu_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -72,9 +69,6 @@ define <vscale x 2 x i8> @vremu_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -114,9 +108,6 @@ define <vscale x 4 x i8> @vremu_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -156,9 +147,6 @@ define <vscale x 8 x i8> @vremu_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -198,9 +186,6 @@ define <vscale x 16 x i8> @vremu_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 5
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -240,9 +225,6 @@ define <vscale x 32 x i8> @vremu_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 5
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -282,9 +264,6 @@ define <vscale x 64 x i8> @vremu_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 5
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -325,9 +304,6 @@ define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -368,9 +344,6 @@ define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -411,9 +384,6 @@ define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -454,9 +424,6 @@ define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 13
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -497,9 +464,6 @@ define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 13
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -540,9 +504,6 @@ define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 13
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -578,14 +539,11 @@ define <vscale x 1 x i32> @vremu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; CHECK-LABEL: vremu_vi_nxv1i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -621,14 +579,11 @@ define <vscale x 2 x i32> @vremu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vremu_vi_nxv2i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -664,14 +619,11 @@ define <vscale x 4 x i32> @vremu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vremu_vi_nxv4i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 29
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v26, v28, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -707,14 +659,11 @@ define <vscale x 8 x i32> @vremu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vremu_vi_nxv8i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v12, v8, a0
; CHECK-NEXT: vsrl.vi v12, v12, 29
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v28, v12, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 29
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -750,14 +699,11 @@ define <vscale x 16 x i32> @vremu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b) {
define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; CHECK-LABEL: vremu_vi_nxv16i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v24, v8, a0
; CHECK-NEXT: vsrl.vi v24, v24, 29
; CHECK-NEXT: vmseq.vi v0, v16, 1
; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 29
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -802,19 +748,16 @@ define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: vmulhu.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v25, v25, a0
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -860,19 +803,16 @@ define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: vmulhu.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v26, v26, a0
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -918,19 +858,16 @@ define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmv.v.i v12, -7
; CHECK-NEXT: vmulhu.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v28, v28, a0
; CHECK-NEXT: vmseq.vi v0, v12, 1
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -976,19 +913,16 @@ define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: sw a0, 12(sp)
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmv.v.i v24, -7
; CHECK-NEXT: vmulhu.vv v16, v8, v16
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v16, v16, a0
; CHECK-NEXT: vmseq.vi v0, v24, 1
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16

View File

@ -30,9 +30,6 @@ define <vscale x 1 x i8> @vremu_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -72,9 +69,6 @@ define <vscale x 2 x i8> @vremu_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -114,9 +108,6 @@ define <vscale x 4 x i8> @vremu_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -156,9 +147,6 @@ define <vscale x 8 x i8> @vremu_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 5
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -198,9 +186,6 @@ define <vscale x 16 x i8> @vremu_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 5
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -240,9 +225,6 @@ define <vscale x 32 x i8> @vremu_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 5
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -282,9 +264,6 @@ define <vscale x 64 x i8> @vremu_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 5
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -325,9 +304,6 @@ define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -368,9 +344,6 @@ define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -411,9 +384,6 @@ define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 13
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -454,9 +424,6 @@ define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 13
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -497,9 +464,6 @@ define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 13
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -540,9 +504,6 @@ define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 13
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -583,9 +544,6 @@ define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -626,9 +584,6 @@ define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: vsrl.vi v25, v25, 29
; CHECK-NEXT: vmv.v.i v26, 1
; CHECK-NEXT: vmseq.vi v0, v26, -7
; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -669,9 +624,6 @@ define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsrl.vi v26, v26, 29
; CHECK-NEXT: vmv.v.i v28, 1
; CHECK-NEXT: vmseq.vi v0, v28, -7
; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -712,9 +664,6 @@ define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsrl.vi v28, v28, 29
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmseq.vi v0, v12, -7
; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -755,9 +704,6 @@ define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: vsrl.vi v16, v16, 29
; CHECK-NEXT: vmv.v.i v24, 1
; CHECK-NEXT: vmseq.vi v0, v24, -7
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
@ -793,16 +739,13 @@ define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv1i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; CHECK-NEXT: vmulhu.vx v25, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v26, v26, a0
; CHECK-NEXT: vmseq.vi v0, v25, 1
; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0
; CHECK-NEXT: vsrl.vx v25, v25, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
@ -838,16 +781,13 @@ define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv2i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu
; CHECK-NEXT: vmulhu.vx v26, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v28, v28, a0
; CHECK-NEXT: vmseq.vi v0, v26, 1
; CHECK-NEXT: vmerge.vvm v26, v28, v8, v0
; CHECK-NEXT: vsrl.vx v26, v26, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
@ -883,16 +823,13 @@ define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv4i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v12, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu
; CHECK-NEXT: vmulhu.vx v28, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v12, v12, a0
; CHECK-NEXT: vmseq.vi v0, v28, 1
; CHECK-NEXT: vmerge.vvm v28, v12, v8, v0
; CHECK-NEXT: vsrl.vx v28, v28, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
@ -928,16 +865,13 @@ define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: slli a0, a0, 61
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vmulhu.vx v24, v8, a0
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vmulhu.vx v16, v8, a0
; CHECK-NEXT: addi a0, zero, 61
; CHECK-NEXT: vsrl.vx v24, v24, a0
; CHECK-NEXT: vmseq.vi v0, v16, 1
; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0
; CHECK-NEXT: vsrl.vx v16, v16, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16