1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[RISCV] Add a vsetvli insert pass that can be extended to be aware of incoming VL/VTYPE from other basic blocks.

This is a replacement for D101938 for inserting vsetvli
instructions where needed. This new version changes how
we track the information in such a way that we can extend
it to be aware of VL/VTYPE changes in other blocks. Given
how much it changes the previous patch, I've decided to
abandon the previous patch and post this from scratch.

For now the pass consists of a single phase that assumes
the incoming state from other basic blocks is unknown. A
follow up patch will extend this with a phase to collect
information about how VL/VTYPE change in each block and
a second phase to propagate this information to the entire
function. This will be used by a third phase to do the
vsetvli insertion.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D102737
This commit is contained in:
Craig Topper 2021-05-24 10:25:27 -07:00
parent 90d57fab4e
commit 9ed3d57a44
27 changed files with 3547 additions and 3454 deletions

View File

@ -21,10 +21,10 @@ add_public_tablegen_target(RISCVCommonTableGen)
add_llvm_target(RISCVCodeGen
RISCVAsmPrinter.cpp
RISCVCallLowering.cpp
RISCVCleanupVSETVLI.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
RISCVFrameLowering.cpp
RISCVInsertVSETVLI.cpp
RISCVInstrInfo.cpp
RISCVInstructionSelector.cpp
RISCVISelDAGToDAG.cpp

View File

@ -86,7 +86,7 @@ enum VConstraintType {
VMConstraint = 0b100,
};
enum VLMUL {
enum VLMUL : uint8_t {
LMUL_1 = 0,
LMUL_2,
LMUL_4,

View File

@ -46,8 +46,8 @@ void initializeRISCVExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVExpandAtomicPseudoPass();
void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVCleanupVSETVLIPass();
void initializeRISCVCleanupVSETVLIPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,

View File

@ -1,163 +0,0 @@
//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a function pass that removes duplicate vsetvli
// instructions within a basic block.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-cleanup-vsetvli"
#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass"
namespace {
class RISCVCleanupVSETVLI : public MachineFunctionPass {
public:
static char ID;
RISCVCleanupVSETVLI() : MachineFunctionPass(ID) {
initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
// This pass modifies the program, but does not modify the CFG
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; }
};
} // end anonymous namespace
char RISCVCleanupVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE,
RISCV_CLEANUP_VSETVLI_NAME, false, false)
static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
// If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
// can't remove this VSETVLI.
if (!PrevVSETVLI || !MI.getOperand(0).isDead())
return false;
// Does this VSET{I}VLI use the same VTYPE immediate.
int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
int64_t VTYPEImm = MI.getOperand(2).getImm();
if (PrevVTYPEImm != VTYPEImm)
return false;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
// If the previous opcode wasn't vsetivli we can't compare them.
if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETIVLI)
return false;
// For VSETIVLI, we can just compare the immediates.
return PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
}
assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
Register AVLReg = MI.getOperand(1).getReg();
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
// If this VSETVLI isn't changing VL, it is redundant.
if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
return true;
// If the previous VSET{I}VLI's output (which isn't X0) is fed into this
// VSETVLI, this one isn't changing VL so is redundant.
// Only perform this on virtual registers to avoid the complexity of having
// to work out if the physical register was clobbered somewhere in between.
if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
return true;
// If the previous opcode isn't vsetvli we can't do any more comparison.
if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
return false;
// Does this VSETVLI use the same AVL register?
if (AVLReg != PrevVSETVLI->getOperand(1).getReg())
return false;
// If the AVLReg is X0 we must be setting VL to VLMAX. Keeping VL unchanged
// was handled above.
if (AVLReg == RISCV::X0) {
// This instruction is setting VL to VLMAX, this is redundant if the
// previous VSETVLI was also setting VL to VLMAX. But it is not redundant
// if they were setting it to any other value or leaving VL unchanged.
return PrevOutVL != RISCV::X0;
}
// This vsetvli is redundant.
return true;
}
bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
MachineInstr *PrevVSETVLI = nullptr;
for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
MachineInstr &MI = *MII++;
if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
MI.getOpcode() != RISCV::PseudoVSETIVLI) {
if (PrevVSETVLI &&
(MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE))) {
// Old VL/VTYPE is overwritten.
PrevVSETVLI = nullptr;
}
continue;
}
if (isRedundantVSETVLI(MI, PrevVSETVLI)) {
// This VSETVLI is redundant, remove it.
MI.eraseFromParent();
Changed = true;
} else {
// Otherwise update VSET{I}VLI for the next iteration.
PrevVSETVLI = &MI;
}
}
return Changed;
}
bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
if (!ST.hasStdExtV())
return false;
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= runOnMachineBasicBlock(MBB);
return Changed;
}
/// Returns an instance of the Cleanup VSETVLI pass.
FunctionPass *llvm::createRISCVCleanupVSETVLIPass() {
return new RISCVCleanupVSETVLI();
}

View File

@ -240,7 +240,8 @@ bool RISCVExpandPseudo::expandLoadTLSGDAddress(
bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format");
assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 &&
"Unexpected instruction format");
DebugLoc DL = MBBI->getDebugLoc();

View File

@ -6463,107 +6463,9 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
static MachineInstr *elideCopies(MachineInstr *MI,
const MachineRegisterInfo &MRI) {
while (true) {
if (!MI->isFullCopy())
return MI;
if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
return nullptr;
MI = MRI.getVRegDef(MI->getOperand(1).getReg());
if (!MI)
return nullptr;
}
}
static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
int VLIndex, unsigned SEWIndex,
RISCVII::VLMUL VLMul,
bool ForceTailAgnostic) {
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned Log2SEW = MI.getOperand(SEWIndex).getImm();
unsigned SEW = 1 << Log2SEW;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
MachineRegisterInfo &MRI = MF.getRegInfo();
auto BuildVSETVLI = [&]() {
if (VLIndex >= 0) {
Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
const MachineOperand &VLOp = MI.getOperand(VLIndex);
// VL can be a register or an immediate.
if (VLOp.isImm())
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addImm(VLOp.getImm());
Register VLReg = MI.getOperand(VLIndex).getReg();
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(VLReg);
}
// With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill);
};
MachineInstrBuilder MIB = BuildVSETVLI();
// Default to tail agnostic unless the destination is tied to a source. In
// that case the user would have some control over the tail values. The tail
// policy is also ignored on instructions that only update element 0 like
// vmv.s.x or reductions so use agnostic there to match the common case.
// FIXME: This is conservatively correct, but we might want to detect that
// the input is undefined.
bool TailAgnostic = true;
unsigned UseOpIdx;
if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
TailAgnostic = false;
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
if (UseMI) {
UseMI = elideCopies(UseMI, MRI);
if (UseMI && UseMI->isImplicitDef())
TailAgnostic = true;
}
}
// For simplicity we reuse the vtype representation here.
MIB.addImm(RISCVVType::encodeVTYPE(VLMul, SEW,
/*TailAgnostic*/ TailAgnostic,
/*MaskAgnostic*/ false));
// Remove (now) redundant operands from pseudo
if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) {
MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
MI.getOperand(VLIndex).setIsKill(false);
}
return BB;
}
MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
unsigned NumOperands = MI.getNumExplicitOperands();
int VLIndex = RISCVII::hasVLOp(TSFlags) ? NumOperands - 2 : -1;
unsigned SEWIndex = NumOperands - 1;
bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
}
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");

View File

@ -0,0 +1,396 @@
//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a function pass that inserts VSETVLI instructions where
// needed.
//
// The pass consists of a single pass over each basic block looking for changes
// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the
// VL/VTYPE values are unknown from predecessors so the first vector instruction
// will always require a new VSETVLI.
//
// TODO: Future enhancements to this pass will take into account VL/VTYPE from
// predecessors.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
namespace {
class VSETVLIInfo {
union {
Register AVLReg;
unsigned AVLImm;
};
enum : uint8_t {
Uninitialized,
AVLIsReg,
AVLIsImm,
Unknown,
} State = Uninitialized;
// Fields from VTYPE.
RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
uint8_t SEW = 0;
bool TailAgnostic = false;
bool MaskAgnostic = false;
public:
VSETVLIInfo() : AVLImm(0) {}
bool isValid() const { return State != Uninitialized; }
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
void setAVLReg(Register Reg) {
AVLReg = Reg;
State = AVLIsReg;
}
void setAVLImm(unsigned Imm) {
AVLImm = Imm;
State = AVLIsImm;
}
bool hasAVLImm() const { return State == AVLIsImm; }
bool hasAVLReg() const { return State == AVLIsReg; }
Register getAVLReg() const {
assert(hasAVLReg());
return AVLReg;
}
unsigned getAVLImm() const {
assert(hasAVLImm());
return AVLImm;
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare AVL in unknown state");
if (hasAVLReg() && Other.hasAVLReg())
return getAVLReg() == Other.getAVLReg();
if (hasAVLImm() && Other.hasAVLImm())
return getAVLImm() == Other.getAVLImm();
return false;
}
void setVTYPE(unsigned VType) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = RISCVVType::getVLMUL(VType);
SEW = RISCVVType::getSEW(VType);
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
}
unsigned encodeVTYPE() const {
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
Other.MaskAgnostic);
}
bool isCompatible(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
// Nothing is compatible with Unknown.
if (isUnknown() || Other.isUnknown())
return false;
// If other doesn't need an AVLReg and the SEW matches, consider it
// compatible.
if (Other.hasAVLReg() && Other.AVLReg == RISCV::NoRegister) {
if (SEW == Other.SEW)
return true;
}
// VTypes must match.
if (!hasSameVTYPE(Other))
return false;
if (hasAVLImm() != Other.hasAVLImm())
return false;
if (hasAVLImm())
return getAVLImm() == Other.getAVLImm();
return getAVLReg() == Other.getAVLReg();
}
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
public:
static char ID;
RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info);
bool emitVSETVLIs(MachineBasicBlock &MBB);
};
} // end anonymous namespace
char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
static MachineInstr *elideCopies(MachineInstr *MI,
const MachineRegisterInfo *MRI) {
while (true) {
if (!MI->isFullCopy())
return MI;
if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
return nullptr;
MI = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!MI)
return nullptr;
}
}
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
unsigned NumOperands = MI.getNumExplicitOperands();
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
unsigned SEW = 1 << Log2SEW;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
// Default to tail agnostic unless the destination is tied to a source.
// Unless the source is undef. In that case the user would have some control
// over the tail values. The tail policy is also ignored on instructions
// that only update element 0 like vmv.s.x or reductions so use agnostic
// there to match the common case.
// FIXME: This is conservatively correct, but we might want to detect that
// the input is undefined.
bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
bool TailAgnostic = true;
unsigned UseOpIdx;
if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
TailAgnostic = false;
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
if (UseMI) {
UseMI = elideCopies(UseMI, MRI);
if (UseMI && UseMI->isImplicitDef())
TailAgnostic = true;
}
}
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
if (VLOp.isImm())
InstrInfo.setAVLImm(VLOp.getImm());
else
InstrInfo.setAVLReg(VLOp.getReg());
} else
InstrInfo.setAVLReg(RISCV::NoRegister);
InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
/*MaskAgnostic*/ false);
return InstrInfo;
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info) {
DebugLoc DL = MI.getDebugLoc();
if (Info.hasAVLImm()) {
// TODO: Use X0 as the destination.
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addImm(Info.getAVLImm())
.addImm(Info.encodeVTYPE());
return;
}
Register AVLReg = Info.getAVLReg();
if (AVLReg == RISCV::NoRegister) {
BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
return;
}
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(Info.getAVLReg())
.addImm(Info.encodeVTYPE());
}
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
Register AVLReg = MI.getOperand(1).getReg();
assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
"Can't handle X0, X0 vsetvli yet");
NewInfo.setAVLReg(AVLReg);
} else {
assert(MI.getOpcode() == RISCV::PseudoVSETIVLI);
NewInfo.setAVLImm(MI.getOperand(1).getImm());
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
return NewInfo;
}
bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
bool MadeChange = false;
// Assume predecessor state is unknown.
VSETVLIInfo CurInfo;
CurInfo.setUnknown();
for (MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
MI.getOperand(4).getReg() == RISCV::VTYPE &&
"Unexpected operands where VL and VTYPE should be");
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
MadeChange = true;
CurInfo = getInfoForVSETVLI(MI);
continue;
}
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
if (RISCVII::hasVLOp(TSFlags)) {
MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
if (VLOp.isReg()) {
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
bool NeedVSETVLI = true;
if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo))
NeedVSETVLI = false;
// We didn't find a compatible value. If our AVL is a virtual register,
// it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() &&
NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) {
if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
NeedVSETVLI = false;
}
}
}
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
if (NeedVSETVLI) {
insertVSETVLI(MBB, MI, NewInfo);
CurInfo = NewInfo;
}
// If we find an instruction we at least changed the operands.
MadeChange = true;
}
// If this is something updates VL/VTYPE that we don't know about, set
// the state to unknown.
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
VSETVLIInfo NewInfo;
NewInfo.setUnknown();
CurInfo = NewInfo;
}
}
return MadeChange;
}
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
if (!ST.hasStdExtV())
return false;
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= emitVSETVLIs(MBB);
return Changed;
}
/// Returns an instance of the Insert VSETVLI pass.
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}

View File

@ -619,8 +619,6 @@ class VPseudoUSLoadNoMask<VReg RetClass, bits<7> EEW, bit isFF> :
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -637,9 +635,7 @@ class VPseudoUSLoadMask<VReg RetClass, bits<7> EEW, bit isFF> :
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "$rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -654,8 +650,6 @@ class VPseudoSLoadNoMask<VReg RetClass, bits<7> EEW>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -672,9 +666,7 @@ class VPseudoSLoadMask<VReg RetClass, bits<7> EEW>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "$rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -690,8 +682,6 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -710,9 +700,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -727,8 +715,6 @@ class VPseudoUSStoreNoMask<VReg StClass, bits<7> EEW>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -743,8 +729,6 @@ class VPseudoUSStoreMask<VReg StClass, bits<7> EEW>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -758,8 +742,6 @@ class VPseudoSStoreNoMask<VReg StClass, bits<7> EEW>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -774,8 +756,6 @@ class VPseudoSStoreMask<VReg StClass, bits<7> EEW>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -790,8 +770,6 @@ class VPseudoUnaryNoDummyMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -804,8 +782,6 @@ class VPseudoNullaryNoMask<VReg RegClass>:
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -819,9 +795,7 @@ class VPseudoNullaryMask<VReg RegClass>:
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints ="$rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -836,8 +810,6 @@ class VPseudoNullaryPseudoM<string BaseInst>
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
// BaseInstr is not used in RISCVExpandPseudoInsts pass.
@ -853,9 +825,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint =
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Constraint;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -870,9 +840,7 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -887,8 +855,6 @@ class VPseudoMaskUnarySOutMask:
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -906,9 +872,7 @@ class VPseudoUnaryAnyMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "@earlyclobber $rd, $rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -925,9 +889,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Constraint;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -943,8 +905,6 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -960,8 +920,6 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -979,9 +937,7 @@ class VPseudoBinaryMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1001,9 +957,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1025,9 +979,7 @@ class VPseudoBinaryCarryIn<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Constraint;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 0;
@ -1047,9 +999,7 @@ class VPseudoTernaryNoMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1068,9 +1018,7 @@ class VPseudoAMOWDNoMask<VReg RetClass,
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 1;
let usesCustomInserter = 1;
let Constraints = "$vd_wd = $vd";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1088,9 +1036,7 @@ class VPseudoAMOWDMask<VReg RetClass,
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 1;
let usesCustomInserter = 1;
let Constraints = "$vd_wd = $vd";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -1131,8 +1077,6 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1148,9 +1092,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "$rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1166,8 +1108,6 @@ class VPseudoSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1183,9 +1123,7 @@ class VPseudoSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "$rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1201,11 +1139,9 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> L
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
// For vector indexed segment loads, the destination vector register groups
// cannot overlap the source vector register group
let Constraints = "@earlyclobber $rd";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1222,11 +1158,9 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMU
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
// For vector indexed segment loads, the destination vector register groups
// cannot overlap the source vector register group
let Constraints = "@earlyclobber $rd, $rd = $merge";
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
@ -1241,8 +1175,6 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1258,8 +1190,6 @@ class VPseudoUSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -1273,8 +1203,6 @@ class VPseudoSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1290,8 +1218,6 @@ class VPseudoSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -1307,8 +1233,6 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3>
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
@ -1325,8 +1249,6 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3> LM
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Uses = [VL, VTYPE];
let HasVLOp = 1;
let HasSEWOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@ -3521,7 +3443,7 @@ let Defs = [VXSAT], hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Averaging Add and Subtract
//===----------------------------------------------------------------------===//
let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
let Uses = [VXRM], hasSideEffects = 1 in {
defm PseudoVAADDU : VPseudoBinaryV_VV_VX;
defm PseudoVAADD : VPseudoBinaryV_VV_VX;
defm PseudoVASUBU : VPseudoBinaryV_VV_VX;
@ -3531,14 +3453,14 @@ let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
//===----------------------------------------------------------------------===//
let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
defm PseudoVSMUL : VPseudoBinaryV_VV_VX;
}
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Scaling Shift Instructions
//===----------------------------------------------------------------------===//
let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
let Uses = [VXRM], hasSideEffects = 1 in {
defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI<uimm5>;
defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI<uimm5>;
}
@ -3546,7 +3468,7 @@ let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// 13.5. Vector Narrowing Fixed-Point Clip Instructions
//===----------------------------------------------------------------------===//
let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI;
defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI;
}
@ -3792,8 +3714,7 @@ defm PseudoVID : VPseudoMaskNullaryV;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtV] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
Uses = [VL, VTYPE] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
@ -3816,8 +3737,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtV, HasStdExtF] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
Uses = [VL, VTYPE] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
foreach f = FPList.fpinfo in {
let VLMul = m.value in {

View File

@ -39,7 +39,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeGlobalISel(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVCleanupVSETVLIPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT) {
@ -191,8 +191,7 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None) {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVCleanupVSETVLIPass());
}
addPass(createRISCVInsertVSETVLIPass());
}

View File

@ -1,56 +0,0 @@
# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \
# RUN: -start-before=finalize-isel -stop-after=finalize-isel -o - \
# RUN: | FileCheck --check-prefix=POST-INSERTER %s
# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \
# RUN: -start-before=finalize-isel -o - \
# RUN: | FileCheck --check-prefix=CODEGEN %s
--- |
define void @vadd_vint64m1(
<vscale x 1 x i64> *%pc,
<vscale x 1 x i64> *%pa,
<vscale x 1 x i64> *%pb,
i64 %vl)
{
ret void
}
...
---
name: vadd_vint64m1
tracksRegLiveness: true
body: |
bb.0 (%ir-block.0):
liveins: $x10, $x11, $x12, $x13
%3:gpr = COPY $x13
%2:gpr = COPY $x12
%1:gpr = COPY $x11
%0:gpr = COPY $x10
%4:vr = PseudoVLE64_V_M1 %1, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
%5:vr = PseudoVLE64_V_M1 %2, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8)
%6:vr = PseudoVADD_VV_M1 killed %4, killed %5, %3, 6, implicit $vl, implicit $vtype
PseudoVSE64_V_M1 killed %6, %0, %3, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8)
PseudoRET
...
# POST-INSERTER: %0:gpr = COPY $x13
# POST-INSERTER: %1:gpr = COPY $x12
# POST-INSERTER: %2:gpr = COPY $x11
# POST-INSERTER: %3:gpr = COPY $x10
# POST-INSERTER: dead %7:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
# POST-INSERTER: %4:vr = PseudoVLE64_V_M1 %2, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
# POST-INSERTER: dead %8:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
# POST-INSERTER: %5:vr = PseudoVLE64_V_M1 %1, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8)
# POST-INSERTER: dead %9:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
# POST-INSERTER: %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, $noreg, 6, implicit $vl, implicit $vtype
# POST-INSERTER: dead %10:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
# POST-INSERTER: PseudoVSE64_V_M1 killed %6, %3, $noreg, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8)
# CODEGEN: vsetvli a3, a3, e64,m1,ta,mu
# CODEGEN-NEXT: vle64.v v25, (a1)
# CODEGEN-NEXT: vle64.v v26, (a2)
# CODEGEN-NEXT: vadd.vv v25, v25, v26
# CODEGEN-NEXT: vse64.v v25, (a0)
# CODEGEN-NEXT: ret

View File

@ -1,32 +0,0 @@
; This test shows the evolution of RVV pseudo instructions within isel.
; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o %t.pre.mir \
; RUN: -stop-before=finalize-isel
; RUN: cat %t.pre.mir | FileCheck --check-prefix=PRE-INSERTER %s
; RUN: llc -mtriple riscv64 -mattr=+experimental-v %t.pre.mir -o %t.post.mir \
; RUN: -start-before=finalize-isel -stop-after=finalize-isel
; RUN: cat %t.post.mir | FileCheck --check-prefix=POST-INSERTER %s
define void @vadd_vint64m1(
<vscale x 1 x i64> *%pc,
<vscale x 1 x i64> *%pa,
<vscale x 1 x i64> *%pb)
{
%va = load <vscale x 1 x i64>, <vscale x 1 x i64>* %pa
%vb = load <vscale x 1 x i64>, <vscale x 1 x i64>* %pb
%vc = add <vscale x 1 x i64> %va, %vb
store <vscale x 1 x i64> %vc, <vscale x 1 x i64> *%pc
ret void
}
; PRE-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8)
; PRE-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8)
; PRE-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $x0, 6, implicit $vl, implicit $vtype
; PRE-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8)
; POST-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8)
; POST-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8)
; POST-INSERTER: dead %6:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype
; POST-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $noreg, 6, implicit $vl, implicit $vtype
; POST-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8)

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
--- |
define void @add_scalable_offset(
@ -55,7 +55,7 @@ body: |
; CHECK: PseudoRET
%1:gpr = COPY $x11
%0:gpr = COPY $x10
%2:vr = PseudoVLE64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
%2:vr = PseudoVLE64_V_M1 %0, %1, 6 :: (load unknown-size from %ir.pa, align 8)
%3:gpr = ADDI %stack.2, 0
VS1R_V killed %2:vr, %3:gpr
PseudoRET

View File

@ -1,46 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
# Make sure we don't combine these VSET{I}VLIs in the cleanup pass. We could not
# differentiate AVL values if the opcode of the previous one is different from
# current one.
--- |
; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll'
source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
define void @cleanup_vsetivli() #0 {
ret void
}
attributes #0 = { "target-features"="+experimental-v" }
...
---
name: cleanup_vsetivli
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
; CHECK-LABEL: name: cleanup_vsetivli
; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
; CHECK: dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoRET
dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
dead %2:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %4:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
PseudoRET
...

View File

@ -1,79 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
--- |
; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
define void @cleanup_vsetvli0() #0 {
ret void
}
define void @cleanup_vsetvli1() #0 {
ret void
}
attributes #0 = { "target-features"="+experimental-v" }
...
---
# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
# the first since we can't tell if this is a change of VL.
name: cleanup_vsetvli0
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
; CHECK-LABEL: name: cleanup_vsetvli0
; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoRET
dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
PseudoRET
...
---
# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
# physical register which is clobbered by a later instruction.
name: cleanup_vsetvli1
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $x3
; CHECK-LABEL: name: cleanup_vsetvli1
; CHECK: liveins: $x3
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
; CHECK: $x1 = COPY $x3
; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoRET
%0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
%2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
$x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
$x1 = COPY $x3
dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
PseudoRET
...

View File

@ -3667,11 +3667,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
@ -3726,12 +3727,13 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -3756,8 +3758,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB3_6
; LMULMAX2-RV32-NEXT: .LBB3_5:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -3900,11 +3902,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a3, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
; LMULMAX1-RV32-NEXT: lui a1, 4112
; LMULMAX1-RV32-NEXT: addi a2, a1, 257
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
@ -3959,12 +3962,13 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
; LMULMAX1-RV32-NEXT: or a1, a5, a1
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
@ -3989,8 +3993,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB3_6
; LMULMAX1-RV32-NEXT: .LBB3_5:
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
; LMULMAX1-RV32-NEXT: or a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
@ -11120,11 +11124,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
@ -11179,12 +11184,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11209,8 +11215,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_6
; LMULMAX2-RV32-NEXT: .LBB7_5:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11237,12 +11243,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
; LMULMAX2-RV32-NEXT: # %bb.7:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11267,8 +11274,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_9
; LMULMAX2-RV32-NEXT: .LBB7_8:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11295,12 +11302,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
; LMULMAX2-RV32-NEXT: # %bb.10:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11325,8 +11333,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_12
; LMULMAX2-RV32-NEXT: .LBB7_11:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11544,13 +11552,14 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11604,11 +11613,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11663,11 +11673,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11721,11 +11732,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX1-RV32-NEXT: # %bb.10:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4

View File

@ -2538,9 +2538,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
@ -2548,13 +2545,16 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -2570,9 +2570,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB3_3
; LMULMAX2-RV32-NEXT: .LBB3_2:
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -2590,10 +2590,11 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
@ -2719,9 +2720,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX1-RV32-NEXT: lui a1, 349525
; LMULMAX1-RV32-NEXT: addi a4, a1, 1365
; LMULMAX1-RV32-NEXT: lui a1, 209715
@ -2729,13 +2727,16 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: addi a1, a5, -1
; LMULMAX1-RV32-NEXT: not a5, a5
; LMULMAX1-RV32-NEXT: and a1, a5, a1
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: and a5, a5, a4
; LMULMAX1-RV32-NEXT: sub a1, a1, a5
@ -2751,9 +2752,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a5, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB3_3
; LMULMAX1-RV32-NEXT: .LBB3_2:
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a5
; LMULMAX1-RV32-NEXT: addi a1, a5, -1
; LMULMAX1-RV32-NEXT: not a5, a5
; LMULMAX1-RV32-NEXT: and a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: and a5, a5, a4
; LMULMAX1-RV32-NEXT: sub a1, a1, a5
@ -2771,10 +2772,11 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a5
@ -7647,9 +7649,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
@ -7657,13 +7656,16 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -7679,9 +7681,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_3
; LMULMAX2-RV32-NEXT: .LBB7_2:
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -7699,10 +7701,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
@ -7741,10 +7744,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
; LMULMAX2-RV32-NEXT: # %bb.7:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
@ -7783,10 +7787,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
; LMULMAX2-RV32-NEXT: # %bb.10:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
@ -7962,25 +7967,25 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a6, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v26, (a6)
; LMULMAX1-RV32-NEXT: addi a7, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v26, (a7)
; LMULMAX1-RV32-NEXT: sw zero, 28(sp)
; LMULMAX1-RV32-NEXT: sw zero, 20(sp)
; LMULMAX1-RV32-NEXT: addi a7, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: lui a2, 349525
; LMULMAX1-RV32-NEXT: addi a5, a2, 1365
; LMULMAX1-RV32-NEXT: lui a2, 209715
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: lui a1, 349525
; LMULMAX1-RV32-NEXT: addi a5, a1, 1365
; LMULMAX1-RV32-NEXT: lui a1, 209715
; LMULMAX1-RV32-NEXT: addi a4, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi t0, a1, -241
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
@ -7999,9 +8004,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_3
; LMULMAX1-RV32-NEXT: .LBB7_2:
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -8019,13 +8024,14 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -8060,15 +8066,16 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -8104,13 +8111,14 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX1-RV32-NEXT: # %bb.10:
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -8149,7 +8157,7 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV32-NEXT: vse64.v v26, (a6)
; LMULMAX1-RV32-NEXT: vse64.v v26, (a7)
; LMULMAX1-RV32-NEXT: addi sp, sp, 32
; LMULMAX1-RV32-NEXT: ret
;

View File

@ -26,7 +26,6 @@ define half @vreduce_ord_fadd_v1f16(<1 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <1 x half>, <1 x half>* %x
@ -45,7 +44,6 @@ define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -63,7 +61,6 @@ define half @vreduce_ord_fadd_v2f16(<2 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
@ -82,7 +79,6 @@ define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -100,7 +96,6 @@ define half @vreduce_ord_fadd_v4f16(<4 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -154,7 +149,6 @@ define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -172,7 +166,6 @@ define half @vreduce_ord_fadd_v16f16(<16 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <16 x half>, <16 x half>* %x
@ -192,7 +185,6 @@ define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -211,7 +203,6 @@ define half @vreduce_ord_fadd_v32f16(<32 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <32 x half>, <32 x half>* %x
@ -231,7 +222,6 @@ define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -250,7 +240,6 @@ define half @vreduce_ord_fadd_v64f16(<64 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <64 x half>, <64 x half>* %x
@ -273,7 +262,6 @@ define half @vreduce_fadd_v128f16(<128 x half>* %x, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -294,13 +282,11 @@ define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v16, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
@ -332,7 +318,6 @@ define float @vreduce_ord_fadd_v1f32(<1 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <1 x float>, <1 x float>* %x
@ -351,7 +336,6 @@ define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) {
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -369,7 +353,6 @@ define float @vreduce_ord_fadd_v2f32(<2 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
@ -423,7 +406,6 @@ define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -441,7 +423,6 @@ define float @vreduce_ord_fadd_v8f32(<8 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <8 x float>, <8 x float>* %x
@ -460,7 +441,6 @@ define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -478,7 +458,6 @@ define float @vreduce_ord_fadd_v16f32(<16 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <16 x float>, <16 x float>* %x
@ -498,7 +477,6 @@ define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -517,7 +495,6 @@ define float @vreduce_ord_fadd_v32f32(<32 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <32 x float>, <32 x float>* %x
@ -540,7 +517,6 @@ define float @vreduce_fadd_v64f32(<64 x float>* %x, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -561,13 +537,11 @@ define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v16, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <64 x float>, <64 x float>* %x
@ -652,7 +626,6 @@ define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -670,7 +643,6 @@ define double @vreduce_ord_fadd_v4f64(<4 x double>* %x, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -689,7 +661,6 @@ define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -707,7 +678,6 @@ define double @vreduce_ord_fadd_v8f64(<8 x double>* %x, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v28, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <8 x double>, <8 x double>* %x
@ -726,7 +696,6 @@ define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -744,7 +713,6 @@ define double @vreduce_ord_fadd_v16f64(<16 x double>* %x, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <16 x double>, <16 x double>* %x
@ -766,7 +734,6 @@ define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -786,13 +753,11 @@ define double @vreduce_ord_fadd_v32f64(<32 x double>* %x, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v16, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
@ -813,7 +778,6 @@ define half @vreduce_fmin_v2f16(<2 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
@ -834,7 +798,6 @@ define half @vreduce_fmin_v4f16(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -853,7 +816,6 @@ define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -872,7 +834,6 @@ define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -897,7 +858,6 @@ define half @vreduce_fmin_v128f16(<128 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
@ -918,7 +878,6 @@ define float @vreduce_fmin_v2f32(<2 x float>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
@ -1005,7 +964,6 @@ define float @vreduce_fmin_v128f32(<128 x float>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <128 x float>, <128 x float>* %x
@ -1046,7 +1004,6 @@ define double @vreduce_fmin_v4f64(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1065,7 +1022,6 @@ define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1084,7 +1040,6 @@ define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1108,7 +1063,6 @@ define double @vreduce_fmin_v32f64(<32 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
@ -1129,7 +1083,6 @@ define half @vreduce_fmax_v2f16(<2 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
@ -1150,7 +1103,6 @@ define half @vreduce_fmax_v4f16(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -1169,7 +1121,6 @@ define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -1188,7 +1139,6 @@ define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
@ -1213,7 +1163,6 @@ define half @vreduce_fmax_v128f16(<128 x half>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
@ -1234,7 +1183,6 @@ define float @vreduce_fmax_v2f32(<2 x float>* %x) {
; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v25, v26
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
@ -1321,7 +1269,6 @@ define float @vreduce_fmax_v128f32(<128 x float>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <128 x float>, <128 x float>* %x
@ -1362,7 +1309,6 @@ define double @vreduce_fmax_v4f64(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1381,7 +1327,6 @@ define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1400,7 +1345,6 @@ define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v26, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
@ -1424,7 +1368,6 @@ define double @vreduce_fmax_v32f64(<32 x double>* %x) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%v = load <32 x double>, <32 x double>* %x

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,8 +15,7 @@ define i64 @test(<vscale x 1 x i64> %0) nounwind {
; CHECK: bb.0.entry:
; CHECK: liveins: $v8
; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8
; CHECK: dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6, implicit $vl, implicit $vtype
; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6
; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a)
; CHECK: $x10 = COPY [[LD]]
; CHECK: PseudoRET implicit $x10

View File

@ -15,20 +15,21 @@ body: |
liveins: $v0, $v1, $v2, $v3
; CHECK-LABEL: name: mask_reg_alloc
; CHECK: liveins: $v0, $v1, $v2, $v3
; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype
; CHECK: dead renamable $x10 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype
; CHECK: renamable $v0 = COPY killed renamable $v1
; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype
; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, $noreg, -1, implicit $vl, implicit $vtype
; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype
; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, 1, 3, implicit $vl, implicit $vtype
; CHECK: PseudoRET implicit $v0
%0:vr = COPY $v0
%1:vr = COPY $v1
%2:vr = COPY $v2
%3:vr = COPY $v3
%4:vmv0 = COPY %0
%5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, $noreg, -1, implicit $vl, implicit $vtype
%5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, 1, 3
%6:vmv0 = COPY %1
%7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, $noreg, -1, implicit $vl, implicit $vtype
%8:vr = PseudoVADD_VV_M1 killed %5, killed %7, $noreg, -1, implicit $vl, implicit $vtype
%7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, 1, 3
%8:vr = PseudoVADD_VV_M1 killed %5, killed %7, 1, 3
$v0 = COPY %8
PseudoRET implicit $v0
...

View File

@ -52,8 +52,7 @@ body: |
; CHECK: $v0 = COPY [[COPY]]
; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]]
; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load 64 from %ir.a, align 8)
; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]]
; CHECK: PseudoRET implicit $v8m8
%1:vr = COPY $v0
@ -61,7 +60,7 @@ body: |
$v0 = COPY %1
%3:vrm8 = IMPLICIT_DEF
%4:vrm8nov0 = COPY %3
%2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
%2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load 64 from %ir.a, align 8)
$v8m8 = COPY %2
PseudoRET implicit $v8m8

View File

@ -13,7 +13,6 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -28,7 +27,6 @@ define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@ -44,7 +42,6 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
; CHECK-NEXT: ret
@ -59,7 +56,6 @@ define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@ -102,7 +98,6 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -117,7 +112,6 @@ define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@ -160,7 +154,6 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
; CHECK-NEXT: ret
@ -175,7 +168,6 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@ -218,7 +210,6 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -233,7 +224,6 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@ -249,7 +239,6 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
; CHECK-NEXT: ret
@ -264,7 +253,6 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-NEXT: vfmv.v.f v25, fa0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vfredosum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@ -282,7 +270,6 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@ -298,7 +285,6 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@ -314,7 +300,6 @@ define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@ -332,7 +317,6 @@ define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@ -368,7 +352,6 @@ define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@ -386,7 +369,6 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@ -402,7 +384,6 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@ -418,7 +399,6 @@ define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@ -452,7 +432,6 @@ define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@ -472,7 +451,6 @@ define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@ -534,7 +512,6 @@ define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@ -552,7 +529,6 @@ define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@ -572,7 +548,6 @@ define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vfredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@ -590,7 +565,6 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@ -606,7 +580,6 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@ -622,7 +595,6 @@ define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@ -640,7 +612,6 @@ define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@ -676,7 +647,6 @@ define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@ -694,7 +664,6 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@ -710,7 +679,6 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@ -726,7 +694,6 @@ define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@ -760,7 +727,6 @@ define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@ -780,7 +746,6 @@ define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@ -842,7 +807,6 @@ define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@ -860,7 +824,6 @@ define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@ -880,7 +843,6 @@ define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vfredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)

View File

@ -10,7 +10,6 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@ -26,7 +25,6 @@ define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@ -43,7 +41,6 @@ define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@ -59,7 +56,6 @@ define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@ -76,7 +72,6 @@ define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@ -92,7 +87,6 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@ -108,7 +102,6 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@ -124,7 +117,6 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@ -140,7 +132,6 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@ -156,7 +147,6 @@ define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@ -173,7 +163,6 @@ define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@ -189,7 +178,6 @@ define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@ -206,7 +194,6 @@ define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@ -222,7 +209,6 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@ -238,7 +224,6 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@ -254,7 +239,6 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@ -270,7 +254,6 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@ -286,7 +269,6 @@ define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@ -303,7 +285,6 @@ define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@ -319,7 +300,6 @@ define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@ -336,7 +316,6 @@ define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@ -352,7 +331,6 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@ -368,7 +346,6 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@ -384,7 +361,6 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@ -400,7 +376,6 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@ -416,7 +391,6 @@ define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@ -433,7 +407,6 @@ define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@ -449,7 +422,6 @@ define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@ -467,7 +439,6 @@ define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@ -483,7 +454,6 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@ -499,7 +469,6 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@ -515,7 +484,6 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@ -531,7 +499,6 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@ -547,7 +514,6 @@ define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@ -564,7 +530,6 @@ define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@ -580,7 +545,6 @@ define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@ -598,7 +562,6 @@ define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@ -614,7 +577,6 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@ -630,7 +592,6 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@ -646,7 +607,6 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@ -777,7 +737,6 @@ define i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@ -793,7 +752,6 @@ define i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@ -810,7 +768,6 @@ define i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@ -826,7 +783,6 @@ define i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@ -844,7 +800,6 @@ define i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@ -860,7 +815,6 @@ define i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@ -876,7 +830,6 @@ define i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@ -892,7 +845,6 @@ define i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@ -1023,7 +975,6 @@ define i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@ -1039,7 +990,6 @@ define i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@ -1056,7 +1006,6 @@ define i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@ -1072,7 +1021,6 @@ define i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@ -1090,7 +1038,6 @@ define i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@ -1106,7 +1053,6 @@ define i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@ -1122,7 +1068,6 @@ define i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@ -1138,7 +1083,6 @@ define i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@ -1314,7 +1258,6 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1334,7 +1277,6 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1360,7 +1302,6 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1381,7 +1322,6 @@ define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1409,7 +1349,6 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1430,7 +1369,6 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1450,7 +1388,6 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1470,7 +1407,6 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1490,7 +1426,6 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1510,7 +1445,6 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1536,7 +1470,6 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1557,7 +1490,6 @@ define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1585,7 +1517,6 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1606,7 +1537,6 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1626,7 +1556,6 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
@ -1646,7 +1575,6 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu

View File

@ -10,7 +10,6 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@ -26,7 +25,6 @@ define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@ -43,7 +41,6 @@ define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@ -59,7 +56,6 @@ define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@ -76,7 +72,6 @@ define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@ -92,7 +87,6 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@ -108,7 +102,6 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@ -124,7 +117,6 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@ -140,7 +132,6 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@ -156,7 +147,6 @@ define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@ -173,7 +163,6 @@ define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@ -189,7 +178,6 @@ define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@ -206,7 +194,6 @@ define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@ -222,7 +209,6 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@ -238,7 +224,6 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@ -254,7 +239,6 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@ -270,7 +254,6 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@ -286,7 +269,6 @@ define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@ -303,7 +285,6 @@ define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@ -319,7 +300,6 @@ define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@ -336,7 +316,6 @@ define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@ -352,7 +331,6 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@ -368,7 +346,6 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@ -384,7 +361,6 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@ -400,7 +376,6 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@ -416,7 +391,6 @@ define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@ -433,7 +407,6 @@ define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@ -449,7 +422,6 @@ define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@ -467,7 +439,6 @@ define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@ -483,7 +454,6 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@ -499,7 +469,6 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@ -515,7 +484,6 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@ -531,7 +499,6 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@ -547,7 +514,6 @@ define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@ -564,7 +530,6 @@ define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@ -580,7 +545,6 @@ define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@ -598,7 +562,6 @@ define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@ -614,7 +577,6 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@ -630,7 +592,6 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@ -646,7 +607,6 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@ -777,7 +737,6 @@ define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@ -793,7 +752,6 @@ define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@ -810,7 +768,6 @@ define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@ -826,7 +783,6 @@ define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@ -844,7 +800,6 @@ define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@ -860,7 +815,6 @@ define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@ -876,7 +830,6 @@ define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@ -892,7 +845,6 @@ define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@ -1023,7 +975,6 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@ -1039,7 +990,6 @@ define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@ -1056,7 +1006,6 @@ define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@ -1072,7 +1021,6 @@ define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@ -1090,7 +1038,6 @@ define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@ -1106,7 +1053,6 @@ define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@ -1122,7 +1068,6 @@ define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@ -1138,7 +1083,6 @@ define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@ -1270,7 +1214,6 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@ -1286,7 +1229,6 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@ -1304,7 +1246,6 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@ -1320,7 +1261,6 @@ define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@ -1338,7 +1278,6 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@ -1354,7 +1293,6 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@ -1370,7 +1308,6 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@ -1386,7 +1323,6 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@ -1402,7 +1338,6 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredsum.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@ -1418,7 +1353,6 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmaxu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@ -1436,7 +1370,6 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@ -1452,7 +1385,6 @@ define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredminu.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@ -1470,7 +1402,6 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@ -1486,7 +1417,6 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, -1
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredand.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@ -1502,7 +1432,6 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@ -1518,7 +1447,6 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredxor.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)

View File

@ -0,0 +1,354 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
--- |
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
; Function Attrs: nounwind
define <vscale x 1 x i64> @add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2) #0 {
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2)
ret <vscale x 1 x i64> %a
}
; Function Attrs: nounwind
define <vscale x 1 x i64> @load_add(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
ret <vscale x 1 x i64> %b
}
; Function Attrs: nounwind
define <vscale x 1 x i64> @load_zext(<vscale x 1 x i32>* %0, i64 %1) #0 {
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %1)
%b = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %1)
ret <vscale x 1 x i64> %b
}
; Function Attrs: nounwind readnone
declare i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64>) #1
; Function Attrs: nounwind
define i64 @vmv_x_s(<vscale x 1 x i64> %0) #0 {
entry:
%a = call i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64> %0)
ret i64 %a
}
define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #2 {
%a = load <2 x i64>, <2 x i64>* %x, align 16
%b = load <2 x i64>, <2 x i64>* %y, align 16
%c = add <2 x i64> %a, %b
store <2 x i64> %c, <2 x i64>* %x, align 16
ret void
}
; Function Attrs: nofree nosync nounwind readnone willreturn
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #3
define i64 @vreduce_add_v2i64(<2 x i64>* %x) #2 {
%v = load <2 x i64>, <2 x i64>* %x, align 16
%red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v)
ret i64 %red
}
; Function Attrs: nounwind
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #0
; Function Attrs: nounwind
define <vscale x 1 x i64> @vsetvli_add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %avl) #0 {
entry:
%a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %a)
ret <vscale x 1 x i64> %b
}
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
; Function Attrs: nounwind readonly
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #2
; Function Attrs: nounwind readonly
declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #2
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
attributes #0 = { nounwind "target-features"="+experimental-v" }
attributes #1 = { nounwind readnone "target-features"="+experimental-v" }
attributes #2 = { "target-features"="+experimental-v" }
attributes #3 = { nofree nosync nounwind readnone willreturn "target-features"="+experimental-v" }
attributes #4 = { nounwind readonly "target-features"="+experimental-v" }
...
---
name: add
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: vr }
- { id: 2, class: gpr }
- { id: 3, class: vr }
liveins:
- { reg: '$v8', virtual-reg: '%0' }
- { reg: '$v9', virtual-reg: '%1' }
- { reg: '$x10', virtual-reg: '%2' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $v8, $v9, $x10
; CHECK-LABEL: name: add
; CHECK: liveins: $v8, $v9, $x10
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK: PseudoRET implicit $v8
%2:gpr = COPY $x10
%1:vr = COPY $v9
%0:vr = COPY $v8
%3:vr = PseudoVADD_VV_M1 %0, %1, %2, 6
$v8 = COPY %3
PseudoRET implicit $v8
...
---
name: load_add
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: vr }
- { id: 2, class: gpr }
- { id: 3, class: vr }
- { id: 4, class: vr }
liveins:
- { reg: '$x10', virtual-reg: '%0' }
- { reg: '$v8', virtual-reg: '%1' }
- { reg: '$x11', virtual-reg: '%2' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $x10, $v8, $x11
; CHECK-LABEL: name: load_add
; CHECK: liveins: $x10, $v8, $x11
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK: PseudoRET implicit $v8
%2:gpr = COPY $x11
%1:vr = COPY $v8
%0:gpr = COPY $x10
%3:vr = PseudoVLE64_V_M1 %0, %2, 6
%4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6
$v8 = COPY %4
PseudoRET implicit $v8
...
---
name: load_zext
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
- { id: 2, class: vr }
- { id: 3, class: vr }
liveins:
- { reg: '$x10', virtual-reg: '%0' }
- { reg: '$x11', virtual-reg: '%1' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $x10, $x11
; CHECK-LABEL: name: load_zext
; CHECK: liveins: $x10, $x11
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5, implicit $vl, implicit $vtype
; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: $v8 = COPY %3
; CHECK: PseudoRET implicit $v8
%1:gpr = COPY $x11
%0:gpr = COPY $x10
%2:vr = PseudoVLE32_V_MF2 %0, %1, 5
early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed %2, %1, 6
$v8 = COPY %3
PseudoRET implicit $v8
...
---
name: vmv_x_s
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: gpr }
liveins:
- { reg: '$v8', virtual-reg: '%0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $v8
; CHECK-LABEL: name: vmv_x_s
; CHECK: liveins: $v8
; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8
; CHECK: dead $x0 = PseudoVSETVLI killed $x0, 88, implicit-def $vl, implicit-def $vtype, implicit $vl
; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[COPY]], 6, implicit $vtype
; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
; CHECK: PseudoRET implicit $x10
%0:vr = COPY $v8
%1:gpr = PseudoVMV_X_S_M1 %0, 6
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: add_v2i64
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
- { id: 2, class: vr }
- { id: 3, class: vr }
- { id: 4, class: vr }
liveins:
- { reg: '$x10', virtual-reg: '%0' }
- { reg: '$x11', virtual-reg: '%1' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $x10, $x11
; CHECK-LABEL: name: add_v2i64
; CHECK: liveins: $x10, $x11
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %5:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x)
; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.y)
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6, implicit $vl, implicit $vtype
; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store 16 into %ir.x)
; CHECK: PseudoRET
%1:gpr = COPY $x11
%0:gpr = COPY $x10
%2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x)
%3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load 16 from %ir.y)
%4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6
PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store 16 into %ir.x)
PseudoRET
...
---
name: vreduce_add_v2i64
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: vr }
- { id: 2, class: vr }
- { id: 3, class: vr }
- { id: 4, class: vr }
- { id: 5, class: gpr }
liveins:
- { reg: '$x10', virtual-reg: '%0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $x10
; CHECK-LABEL: name: vreduce_add_v2i64
; CHECK: liveins: $x10
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %6:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x)
; CHECK: dead %7:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, $noreg, 6, implicit $vl, implicit $vtype
; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK: dead %8:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVREDSUM_VS_M1_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6, implicit $vl, implicit $vtype
; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 killed [[PseudoVREDSUM_VS_M1_]], 6, implicit $vtype
; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
; CHECK: PseudoRET implicit $x10
%0:gpr = COPY $x10
%1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x)
%2:vr = PseudoVMV_V_I_M1 0, $x0, 6
%4:vr = IMPLICIT_DEF
%3:vr = PseudoVREDSUM_VS_M1 %4, killed %1, killed %2, 2, 6
%5:gpr = PseudoVMV_X_S_M1 killed %3, 6
$x10 = COPY %5
PseudoRET implicit $x10
...
---
name: vsetvli_add
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: vr }
- { id: 2, class: gpr }
- { id: 3, class: gpr }
- { id: 4, class: vr }
liveins:
- { reg: '$v8', virtual-reg: '%0' }
- { reg: '$v9', virtual-reg: '%1' }
- { reg: '$x10', virtual-reg: '%2' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $v8, $v9, $x10
; CHECK-LABEL: name: vsetvli_add
; CHECK: liveins: $v8, $v9, $x10
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK: PseudoRET implicit $v8
%2:gpr = COPY $x10
%1:vr = COPY $v9
%0:vr = COPY $v8
%3:gpr = PseudoVSETVLI %2, 88, implicit-def dead $vl, implicit-def dead $vtype
%4:vr = PseudoVADD_VV_M1 %0, %1, killed %3, 6
$v8 = COPY %4
PseudoRET implicit $v8
...

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
--- |
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
@ -41,7 +41,7 @@ body: |
; CHECK: PseudoRET
%0:gpr = COPY $x10
%1:gpr = COPY $x11
$v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype
$v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6
PseudoVSPILL7_M1 killed renamable $v0_v1_v2_v3_v4_v5_v6, %stack.0, $x0
renamable $v7_v8_v9_v10_v11_v12_v13 = PseudoVRELOAD7_M1 %stack.0, $x0
VS1R_V killed $v8, %0:gpr