1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[Target][ARM] Adding MVE VPT Optimisation Pass

Differential Revision: https://reviews.llvm.org/D76709
This commit is contained in:
Pierre-vh 2020-03-18 15:23:17 +00:00
parent 9d34186bb4
commit 5ddbe3862b
7 changed files with 1109 additions and 0 deletions

View File

@ -47,6 +47,7 @@ FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createMVEVPTBlockPass();
FunctionPass *createMVEVPTOptimisationsPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor = nullptr);
@ -66,6 +67,7 @@ void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
void initializeMVEVPTOptimisationsPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);

View File

@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
initializeMVEVPTBlockPass(Registry);
initializeMVEVPTOptimisationsPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
initializeMVEGatherScatterLoweringPass(Registry);
@ -487,6 +488,8 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
addPass(createMVEVPTOptimisationsPass());
addPass(createMLxExpansionPass());
if (EnableARMLoadStoreOpt)

View File

@ -54,6 +54,7 @@ add_llvm_target(ARMCodeGen
MVEGatherScatterLowering.cpp
MVETailPredication.cpp
MVEVPTBlockPass.cpp
MVEVPTOptimisationsPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
ThumbRegisterInfo.cpp

View File

@ -0,0 +1,232 @@
//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This pass does a few optimisations related to MVE VPT blocks before
/// register allocation is performed. The goal is to maximize the sizes of the
/// blocks that will be created by the MVE VPT Block Insertion pass (which runs
/// after register allocation). Currently, this pass replaces VCMPs with VPNOTs
/// when possible, so the Block Insertion pass can delete them later to create
/// larger VPT blocks.
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt-opts"
namespace {
class MVEVPTOptimisations : public MachineFunctionPass {
public:
static char ID;
const Thumb2InstrInfo *TII;
MachineRegisterInfo *MRI;
MVEVPTOptimisations() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override {
return "ARM MVE VPT Optimisation Pass";
}
private:
bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
};
char MVEVPTOptimisations::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE,
"ARM MVE VPT Optimisations pass", false, false)
// Returns true if Opcode is any VCMP Opcode.
static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
// Returns true if a VCMP with this Opcode can have its operands swapped.
// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
// and VCMPr instructions (since the r is always on the right).
static bool CanHaveSwappedOperands(unsigned Opcode) {
switch (Opcode) {
default:
return true;
case ARM::MVE_VCMPf32:
case ARM::MVE_VCMPf16:
case ARM::MVE_VCMPf32r:
case ARM::MVE_VCMPf16r:
case ARM::MVE_VCMPi8r:
case ARM::MVE_VCMPi16r:
case ARM::MVE_VCMPi32r:
case ARM::MVE_VCMPu8r:
case ARM::MVE_VCMPu16r:
case ARM::MVE_VCMPu32r:
case ARM::MVE_VCMPs8r:
case ARM::MVE_VCMPs16r:
case ARM::MVE_VCMPs32r:
return false;
}
}
// Returns the CondCode of a VCMP Instruction.
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
return ARMCC::CondCodes(Instr.getOperand(3).getImm());
}
// Returns true if Cond is equivalent to a VPNOT instruction on the result of
// Prev. Cond and Prev must be VCMPs.
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
// Opcodes must match.
if (Cond.getOpcode() != Prev.getOpcode())
return false;
MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
// If the VCMP has the opposite condition with the same operands, we can
// replace it with a VPNOT
ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
if (ExpectedCode == GetCondCode(Prev))
if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
return true;
// Check again with operands swapped if possible
if (!CanHaveSwappedOperands(Cond.getOpcode()))
return false;
ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
CondOP2.isIdenticalTo(PrevOP1);
}
// Returns true if Instr writes to VCCR.
static bool IsWritingToVCCR(MachineInstr &Instr) {
if (Instr.getNumOperands() == 0)
return false;
MachineOperand &Dst = Instr.getOperand(0);
if (!Dst.isReg())
return false;
Register DstReg = Dst.getReg();
if (!DstReg.isVirtual())
return false;
MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
}
// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 4> DeadInstructions;
// The last VCMP that we have seen and that couldn't be replaced.
// This is reset when an instruction that writes to VCCR/VPR is found, or when
// a VCMP is replaced with a VPNOT.
// We'll only replace VCMPs with VPNOTs when this is not null, and when the
// current VCMP is the opposite of PrevVCMP.
MachineInstr *PrevVCMP = nullptr;
// If we find an instruction that kills the result of PrevVCMP, we save the
// operand here to remove the kill flag in case we need to use PrevVCMP's
// result.
MachineOperand *PrevVCMPResultKiller = nullptr;
for (MachineInstr &Instr : MBB.instrs()) {
if (PrevVCMP) {
if (MachineOperand *MO = Instr.findRegisterUseOperand(
PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
// If we come accross the instr that kills PrevVCMP's result, record it
// so we can remove the kill flag later if we need to.
PrevVCMPResultKiller = MO;
}
}
// Ignore predicated instructions.
if (getVPTInstrPredicate(Instr) != ARMVCC::None)
continue;
// Only look at VCMPs
if (!IsVCMP(Instr.getOpcode())) {
// If the instruction writes to VCCR, forget the previous VCMP.
if (IsWritingToVCCR(Instr))
PrevVCMP = nullptr;
continue;
}
if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
PrevVCMP = &Instr;
continue;
}
// The register containing the result of the VCMP that we're going to
// replace.
Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
// Build a VPNOT to replace the VCMP, reusing its operands.
MachineInstrBuilder MIBuilder =
BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
.add(Instr.getOperand(0))
.addReg(PrevVCMPResultReg);
addUnpredicatedMveVpredNOp(MIBuilder);
LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
Instr.dump());
// If we found an instruction that uses, and kills PrevVCMP's result,
// remove the kill flag.
if (PrevVCMPResultKiller)
PrevVCMPResultKiller->setIsKill(false);
// Finally, mark the old VCMP for removal and reset
// PrevVCMP/PrevVCMPResultKiller.
DeadInstructions.push_back(&Instr);
PrevVCMP = nullptr;
PrevVCMPResultKiller = nullptr;
}
for (MachineInstr *DeadInstruction : DeadInstructions)
DeadInstruction->removeFromParent();
return !DeadInstructions.empty();
}
bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
MRI = &Fn.getRegInfo();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
<< "********** Function: " << Fn.getName() << '\n');
bool Modified = false;
for (MachineBasicBlock &MBB : Fn)
Modified |= ReplaceVCMPsByVPNOTs(MBB);
LLVM_DEBUG(dbgs() << "**************************************\n");
return Modified;
}
/// createMVEVPTOptimisationsPass
FunctionPass *llvm::createMVEVPTOptimisationsPass() {
return new MVEVPTOptimisations();
}

View File

@ -92,6 +92,7 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MVE VPT Optimisation Pass
; CHECK-NEXT: ARM MLA / MLS expansion pass
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
; CHECK-NEXT: ARM A15 S->D optimizer

View File

@ -0,0 +1,323 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpt_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
ret <4 x i32> %1
}
define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptt_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q3, q0
; CHECK-NEXT: vptt.s32 ge, q0, q2
; CHECK-NEXT: vorrt q3, q1, q2
; CHECK-NEXT: vorrt q0, q3, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
ret <4 x i32> %2
}
define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpttt_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpttt.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptttt_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vptttt.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
ret <4 x i32> %4
}
define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpte_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpte.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptte_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vptte.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorre q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
ret <4 x i32> %4
}
define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptee_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vptee.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vorre q0, q1, q2
; CHECK-NEXT: vorre q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
ret <4 x i32> %4
}
define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptet_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vcmp.s32 ge, q0, q2
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
ret <4 x i32> %4
}
define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpttet_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vcmp.s32 ge, q0, q2
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vpstt
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptett_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vcmp.s32 ge, q0, q2
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpstt
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpteet_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vcmp.s32 ge, q0, q2
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpnot
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpteee_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpteee.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vptete_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vcmp.s32 ge, q0, q2
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpnot
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpttte_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpttte.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
ret <4 x i32> %5
}
define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpttee_block:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpttee.s32 ge, q0, q2
; CHECK-NEXT: vorrt q0, q1, q2
; CHECK-NEXT: vmovt q0, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: vmove q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = icmp sge <4 x i32> %a, %c
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
ret <4 x i32> %5
}

View File

@ -0,0 +1,547 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
; Functions are intentionally left blank - see the MIR sequences below.
define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond_and_swapped_operands(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @triple_vcmp(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @killed_vccr_values(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @predicated_vcmps(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @flt_with_swapped_operands(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @different_opcodes(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @incorrect_condcode(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
define arm_aapcs_vfpcc <4 x float> @vpr_or_vccr_write_between_vcmps(<4 x float> %inactive1) #0 {
entry:
ret <4 x float> %inactive1
}
attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
...
---
name: vcmp_with_opposite_cond
alignment: 4
body: |
; CHECK-LABEL: name: vcmp_with_opposite_cond
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32_]], 0, $noreg
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
; CHECK: bb.5:
; CHECK: successors: %bb.6(0x80000000)
; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
; CHECK: bb.6:
; CHECK: successors: %bb.7(0x80000000)
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
; CHECK: bb.7:
; CHECK: successors: %bb.8(0x80000000)
; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
; CHECK: bb.8:
; CHECK: successors: %bb.9(0x80000000)
; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
; CHECK: bb.9:
; CHECK: successors: %bb.10(0x80000000)
; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT9:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
; CHECK: bb.10:
; CHECK: successors: %bb.11(0x80000000)
; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT10:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
; CHECK: bb.11:
; CHECK: successors: %bb.12(0x80000000)
; CHECK: [[MVE_VCMPf16r:%[0-9]+]]:vccr = MVE_VCMPf16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT11:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16r]], 0, $noreg
; CHECK: bb.12:
; CHECK: successors: %bb.13(0x80000000)
; CHECK: [[MVE_VCMPf32r:%[0-9]+]]:vccr = MVE_VCMPf32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT12:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32r]], 0, $noreg
; CHECK: bb.13:
; CHECK: successors: %bb.14(0x80000000)
; CHECK: [[MVE_VCMPi16r:%[0-9]+]]:vccr = MVE_VCMPi16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT13:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16r]], 0, $noreg
; CHECK: bb.14:
; CHECK: successors: %bb.15(0x80000000)
; CHECK: [[MVE_VCMPi32r:%[0-9]+]]:vccr = MVE_VCMPi32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT14:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32r]], 0, $noreg
; CHECK: bb.15:
; CHECK: successors: %bb.16(0x80000000)
; CHECK: [[MVE_VCMPi8r:%[0-9]+]]:vccr = MVE_VCMPi8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT15:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8r]], 0, $noreg
; CHECK: bb.16:
; CHECK: successors: %bb.17(0x80000000)
; CHECK: [[MVE_VCMPs16r:%[0-9]+]]:vccr = MVE_VCMPs16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT16:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16r]], 0, $noreg
; CHECK: bb.17:
; CHECK: successors: %bb.18(0x80000000)
; CHECK: [[MVE_VCMPs32r:%[0-9]+]]:vccr = MVE_VCMPs32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT17:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32r]], 0, $noreg
; CHECK: bb.18:
; CHECK: successors: %bb.19(0x80000000)
; CHECK: [[MVE_VCMPs8r:%[0-9]+]]:vccr = MVE_VCMPs8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT18:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8r]], 0, $noreg
; CHECK: bb.19:
; CHECK: successors: %bb.20(0x80000000)
; CHECK: [[MVE_VCMPu16r:%[0-9]+]]:vccr = MVE_VCMPu16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT19:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16r]], 0, $noreg
; CHECK: bb.20:
; CHECK: successors: %bb.21(0x80000000)
; CHECK: [[MVE_VCMPu32r:%[0-9]+]]:vccr = MVE_VCMPu32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT20:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32r]], 0, $noreg
; CHECK: bb.21:
; CHECK: successors: %bb.22(0x80000000)
; CHECK: [[MVE_VCMPu8r:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT21:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r]], 0, $noreg
; CHECK: bb.22:
; CHECK: [[MVE_VCMPu8r1:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, $zr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT22:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r1]], 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
;
; Tests that VCMPs with an opposite condition are correctly converted into VPNOTs.
;
bb.0:
%3:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.1:
%5:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%6:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.2:
%7:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%8:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.3:
%9:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%10:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.4:
%11:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%12:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.5:
%13:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%14:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.6:
%15:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%16:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.7:
%17:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%18:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.8:
%19:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%20:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.9:
%21:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%22:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.10:
%23:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%24:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 11, 0, $noreg
bb.11:
%25:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%26:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.12:
%27:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%28:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.13:
%29:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%30:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.14:
%31:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%32:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.15:
%33:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%34:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.16:
%35:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%36:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.17:
%37:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%38:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.18:
%39:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%40:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.19:
%41:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%42:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.20:
%43:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%44:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.21:
%45:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
%46:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
bb.22:
; There shouldn't be any exception for $zr, so the second VCMP should
; be transformed into a VPNOT.
%47:vccr = MVE_VCMPu8r %0:mqpr, $zr, 10, 0, $noreg
%48:vccr = MVE_VCMPu8r %0:mqpr, $zr, 11, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: vcmp_with_opposite_cond_and_swapped_operands
alignment: 4
body: |
; CHECK-LABEL: name: vcmp_with_opposite_cond_and_swapped_operands
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
; CHECK: bb.5:
; CHECK: successors: %bb.6(0x80000000)
; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
; CHECK: bb.6:
; CHECK: successors: %bb.7(0x80000000)
; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
; CHECK: bb.7:
; CHECK: successors: %bb.8(0x80000000)
; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
; CHECK: bb.8:
; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
;
; Tests that VCMPs with an opposite condition and swapped operands are
; correctly converted into VPNOTs.
;
bb.0:
%2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.1:
%4:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%5:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.2:
%6:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%7:vccr = MVE_VCMPi8 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.3:
%8:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%9:vccr = MVE_VCMPs16 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.4:
%10:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%11:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.5:
%12:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%13:vccr = MVE_VCMPs8 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.6:
%14:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%15:vccr = MVE_VCMPu16 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.7:
%16:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%17:vccr = MVE_VCMPu32 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.8:
%18:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
%19:vccr = MVE_VCMPu8 %1:mqpr, %0:mqpr, 12, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: triple_vcmp
alignment: 4
body: |
;
; Tests that, when there are 2 "VPNOT-like VCMPs" in a row, only the first
; becomes a VPNOT.
;
bb.0:
; CHECK-LABEL: name: triple_vcmp
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 12, 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
%4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: killed_vccr_values
alignment: 4
body: |
bb.0:
;
; Tests that, if the result of the VCMP is killed before the
; second VCMP (that will be converted into a VPNOT) is found,
; the kill flag is removed.
;
; CHECK-LABEL: name: killed_vccr_values
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR %1:mqpr, %2:mqpr, 1, [[MVE_VCMPf16_]], undef [[MVE_VORR]]
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:mqpr = MVE_VORR %0:mqpr, %1:mqpr, 1, killed %2:vccr, undef %3:mqpr
%4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: predicated_vcmps
alignment: 4
body: |
; CHECK-LABEL: name: predicated_vcmps
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPi16_1:%[0-9]+]]:vccr = MVE_VCMPi16 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi16_]]
; CHECK: [[MVE_VCMPi16_2:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_]]
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPi32_1:%[0-9]+]]:vccr = MVE_VCMPi32 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi32_]]
; CHECK: [[MVE_VCMPi32_2:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_]]
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf16_]]
; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf16_]]
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf32_]]
; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf32_]]
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: [[MVE_VCMPi16_3:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPi16_4:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi16_3]]
; CHECK: [[MVE_VCMPi16_5:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_3]]
; CHECK: bb.5:
; CHECK: [[MVE_VCMPi32_3:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPi32_4:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi32_3]]
; CHECK: [[MVE_VCMPi32_5:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_3]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
;
; Tests that predicated VCMPs are not replaced.
;
bb.0:
%2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 1, %2:vccr
%4:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %2:vccr
bb.1:
%5:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%6:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 1, %5:vccr
%7:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %5:vccr
bb.2:
%8:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%9:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 1, %8:vccr
%10:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 1, %8:vccr
bb.3:
%11:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%12:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 1, %11:vccr
%13:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 1, %11:vccr
bb.4:
%14:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%15:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 1, %14:vccr
%16:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %14:vccr
bb.5:
%17:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%18:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 1, %17:vccr
%19:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %17:vccr
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: flt_with_swapped_operands
alignment: 4
body: |
; CHECK-LABEL: name: flt_with_swapped_operands
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 12, 0, $noreg
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 12, 0, $noreg
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf16_3:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 11, 0, $noreg
; CHECK: bb.3:
; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPf32_3:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 11, 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
;
; Tests that float VCMPs with an opposite condition and swapped operands
; are not transformed into VPNOTs.
;
bb.0:
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.1:
%4:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%5:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 12, 0, $noreg
bb.2:
%6:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
%7:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 11, 0, $noreg
bb.3:
%8:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%9:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 11, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: different_opcodes
alignment: 4
body: |
;
; Tests that a "VPNOT-like VCMP" with an opcode different from the previous VCMP
; is not transformed into a VPNOT.
;
bb.0:
; CHECK-LABEL: name: different_opcodes
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 0, 0, $noreg
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 1, 1, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 0, 0, $noreg
%3:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 1, 1, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: incorrect_condcode
alignment: 4
body: |
; CHECK-LABEL: name: incorrect_condcode
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 11, 0, $noreg
; CHECK: bb.1:
; CHECK: [[MVE_VCMPs32_2:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
; CHECK: [[MVE_VCMPs32_3:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
;
; Tests that a VCMP is not transformed into a VPNOT if its CondCode is not
; the opposite CondCode.
;
bb.0:
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 11, 0, $noreg
bb.1:
%4:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
%5:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...
---
name: vpr_or_vccr_write_between_vcmps
alignment: 4
body: |
;
; Tests that a "VPNOT-like VCMP" will not be transformed into a VPNOT if
; VCCR/VPR is written to in-between.
;
bb.0:
; CHECK-LABEL: name: vpr_or_vccr_write_between_vcmps
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT killed [[MVE_VCMPs32_]], 0, $noreg
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 10, 0, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
%3:vccr = MVE_VPNOT killed %2:vccr, 0, $noreg
%4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 10, 0, $noreg
tBX_RET 14, $noreg, implicit %0:mqpr
...