mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[Target][ARM] Adding MVE VPT Optimisation Pass
Differential Revision: https://reviews.llvm.org/D76709
This commit is contained in:
parent
9d34186bb4
commit
5ddbe3862b
@ -47,6 +47,7 @@ FunctionPass *createARMConstantIslandPass();
|
||||
FunctionPass *createMLxExpansionPass();
|
||||
FunctionPass *createThumb2ITBlockPass();
|
||||
FunctionPass *createMVEVPTBlockPass();
|
||||
FunctionPass *createMVEVPTOptimisationsPass();
|
||||
FunctionPass *createARMOptimizeBarriersPass();
|
||||
FunctionPass *createThumb2SizeReductionPass(
|
||||
std::function<bool(const Function &)> Ftor = nullptr);
|
||||
@ -66,6 +67,7 @@ void initializeARMExpandPseudoPass(PassRegistry &);
|
||||
void initializeThumb2SizeReducePass(PassRegistry &);
|
||||
void initializeThumb2ITBlockPass(PassRegistry &);
|
||||
void initializeMVEVPTBlockPass(PassRegistry &);
|
||||
void initializeMVEVPTOptimisationsPass(PassRegistry &);
|
||||
void initializeARMLowOverheadLoopsPass(PassRegistry &);
|
||||
void initializeMVETailPredicationPass(PassRegistry &);
|
||||
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
|
||||
|
@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
|
||||
initializeARMExpandPseudoPass(Registry);
|
||||
initializeThumb2SizeReducePass(Registry);
|
||||
initializeMVEVPTBlockPass(Registry);
|
||||
initializeMVEVPTOptimisationsPass(Registry);
|
||||
initializeMVETailPredicationPass(Registry);
|
||||
initializeARMLowOverheadLoopsPass(Registry);
|
||||
initializeMVEGatherScatterLoweringPass(Registry);
|
||||
@ -487,6 +488,8 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
|
||||
|
||||
void ARMPassConfig::addPreRegAlloc() {
|
||||
if (getOptLevel() != CodeGenOpt::None) {
|
||||
addPass(createMVEVPTOptimisationsPass());
|
||||
|
||||
addPass(createMLxExpansionPass());
|
||||
|
||||
if (EnableARMLoadStoreOpt)
|
||||
|
@ -54,6 +54,7 @@ add_llvm_target(ARMCodeGen
|
||||
MVEGatherScatterLowering.cpp
|
||||
MVETailPredication.cpp
|
||||
MVEVPTBlockPass.cpp
|
||||
MVEVPTOptimisationsPass.cpp
|
||||
Thumb1FrameLowering.cpp
|
||||
Thumb1InstrInfo.cpp
|
||||
ThumbRegisterInfo.cpp
|
||||
|
232
lib/Target/ARM/MVEVPTOptimisationsPass.cpp
Normal file
232
lib/Target/ARM/MVEVPTOptimisationsPass.cpp
Normal file
@ -0,0 +1,232 @@
|
||||
//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file This pass does a few optimisations related to MVE VPT blocks before
|
||||
/// register allocation is performed. The goal is to maximize the sizes of the
|
||||
/// blocks that will be created by the MVE VPT Block Insertion pass (which runs
|
||||
/// after register allocation). Currently, this pass replaces VCMPs with VPNOTs
|
||||
/// when possible, so the Block Insertion pass can delete them later to create
|
||||
/// larger VPT blocks.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM.h"
|
||||
#include "ARMSubtarget.h"
|
||||
#include "MCTargetDesc/ARMBaseInfo.h"
|
||||
#include "Thumb2InstrInfo.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <cassert>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "arm-mve-vpt-opts"
|
||||
|
||||
namespace {
|
||||
class MVEVPTOptimisations : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
const Thumb2InstrInfo *TII;
|
||||
MachineRegisterInfo *MRI;
|
||||
|
||||
MVEVPTOptimisations() : MachineFunctionPass(ID) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &Fn) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "ARM MVE VPT Optimisation Pass";
|
||||
}
|
||||
|
||||
private:
|
||||
bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
|
||||
};
|
||||
|
||||
char MVEVPTOptimisations::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE,
|
||||
"ARM MVE VPT Optimisations pass", false, false)
|
||||
|
||||
// Returns true if Opcode is any VCMP Opcode.
|
||||
static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
|
||||
|
||||
// Returns true if a VCMP with this Opcode can have its operands swapped.
|
||||
// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
|
||||
// and VCMPr instructions (since the r is always on the right).
|
||||
static bool CanHaveSwappedOperands(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return true;
|
||||
case ARM::MVE_VCMPf32:
|
||||
case ARM::MVE_VCMPf16:
|
||||
case ARM::MVE_VCMPf32r:
|
||||
case ARM::MVE_VCMPf16r:
|
||||
case ARM::MVE_VCMPi8r:
|
||||
case ARM::MVE_VCMPi16r:
|
||||
case ARM::MVE_VCMPi32r:
|
||||
case ARM::MVE_VCMPu8r:
|
||||
case ARM::MVE_VCMPu16r:
|
||||
case ARM::MVE_VCMPu32r:
|
||||
case ARM::MVE_VCMPs8r:
|
||||
case ARM::MVE_VCMPs16r:
|
||||
case ARM::MVE_VCMPs32r:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the CondCode of a VCMP Instruction.
|
||||
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
|
||||
assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
|
||||
return ARMCC::CondCodes(Instr.getOperand(3).getImm());
|
||||
}
|
||||
|
||||
// Returns true if Cond is equivalent to a VPNOT instruction on the result of
|
||||
// Prev. Cond and Prev must be VCMPs.
|
||||
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
|
||||
assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
|
||||
|
||||
// Opcodes must match.
|
||||
if (Cond.getOpcode() != Prev.getOpcode())
|
||||
return false;
|
||||
|
||||
MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
|
||||
MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
|
||||
|
||||
// If the VCMP has the opposite condition with the same operands, we can
|
||||
// replace it with a VPNOT
|
||||
ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
|
||||
ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
|
||||
if (ExpectedCode == GetCondCode(Prev))
|
||||
if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
|
||||
return true;
|
||||
// Check again with operands swapped if possible
|
||||
if (!CanHaveSwappedOperands(Cond.getOpcode()))
|
||||
return false;
|
||||
ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
|
||||
return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
|
||||
CondOP2.isIdenticalTo(PrevOP1);
|
||||
}
|
||||
|
||||
// Returns true if Instr writes to VCCR.
|
||||
static bool IsWritingToVCCR(MachineInstr &Instr) {
|
||||
if (Instr.getNumOperands() == 0)
|
||||
return false;
|
||||
MachineOperand &Dst = Instr.getOperand(0);
|
||||
if (!Dst.isReg())
|
||||
return false;
|
||||
Register DstReg = Dst.getReg();
|
||||
if (!DstReg.isVirtual())
|
||||
return false;
|
||||
MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
|
||||
const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
|
||||
return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
|
||||
}
|
||||
|
||||
// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
|
||||
bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
|
||||
SmallVector<MachineInstr *, 4> DeadInstructions;
|
||||
|
||||
// The last VCMP that we have seen and that couldn't be replaced.
|
||||
// This is reset when an instruction that writes to VCCR/VPR is found, or when
|
||||
// a VCMP is replaced with a VPNOT.
|
||||
// We'll only replace VCMPs with VPNOTs when this is not null, and when the
|
||||
// current VCMP is the opposite of PrevVCMP.
|
||||
MachineInstr *PrevVCMP = nullptr;
|
||||
// If we find an instruction that kills the result of PrevVCMP, we save the
|
||||
// operand here to remove the kill flag in case we need to use PrevVCMP's
|
||||
// result.
|
||||
MachineOperand *PrevVCMPResultKiller = nullptr;
|
||||
|
||||
for (MachineInstr &Instr : MBB.instrs()) {
|
||||
if (PrevVCMP) {
|
||||
if (MachineOperand *MO = Instr.findRegisterUseOperand(
|
||||
PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
|
||||
// If we come accross the instr that kills PrevVCMP's result, record it
|
||||
// so we can remove the kill flag later if we need to.
|
||||
PrevVCMPResultKiller = MO;
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore predicated instructions.
|
||||
if (getVPTInstrPredicate(Instr) != ARMVCC::None)
|
||||
continue;
|
||||
|
||||
// Only look at VCMPs
|
||||
if (!IsVCMP(Instr.getOpcode())) {
|
||||
// If the instruction writes to VCCR, forget the previous VCMP.
|
||||
if (IsWritingToVCCR(Instr))
|
||||
PrevVCMP = nullptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
|
||||
PrevVCMP = &Instr;
|
||||
continue;
|
||||
}
|
||||
|
||||
// The register containing the result of the VCMP that we're going to
|
||||
// replace.
|
||||
Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
|
||||
|
||||
// Build a VPNOT to replace the VCMP, reusing its operands.
|
||||
MachineInstrBuilder MIBuilder =
|
||||
BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
|
||||
.add(Instr.getOperand(0))
|
||||
.addReg(PrevVCMPResultReg);
|
||||
addUnpredicatedMveVpredNOp(MIBuilder);
|
||||
LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
|
||||
MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
|
||||
Instr.dump());
|
||||
|
||||
// If we found an instruction that uses, and kills PrevVCMP's result,
|
||||
// remove the kill flag.
|
||||
if (PrevVCMPResultKiller)
|
||||
PrevVCMPResultKiller->setIsKill(false);
|
||||
|
||||
// Finally, mark the old VCMP for removal and reset
|
||||
// PrevVCMP/PrevVCMPResultKiller.
|
||||
DeadInstructions.push_back(&Instr);
|
||||
PrevVCMP = nullptr;
|
||||
PrevVCMPResultKiller = nullptr;
|
||||
}
|
||||
|
||||
for (MachineInstr *DeadInstruction : DeadInstructions)
|
||||
DeadInstruction->removeFromParent();
|
||||
|
||||
return !DeadInstructions.empty();
|
||||
}
|
||||
|
||||
bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
|
||||
const ARMSubtarget &STI =
|
||||
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
|
||||
|
||||
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
|
||||
return false;
|
||||
|
||||
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
|
||||
MRI = &Fn.getRegInfo();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
|
||||
<< "********** Function: " << Fn.getName() << '\n');
|
||||
|
||||
bool Modified = false;
|
||||
for (MachineBasicBlock &MBB : Fn)
|
||||
Modified |= ReplaceVCMPsByVPNOTs(MBB);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "**************************************\n");
|
||||
return Modified;
|
||||
}
|
||||
|
||||
/// createMVEVPTOptimisationsPass
|
||||
FunctionPass *llvm::createMVEVPTOptimisationsPass() {
|
||||
return new MVEVPTOptimisations();
|
||||
}
|
@ -92,6 +92,7 @@
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
; CHECK-NEXT: MVE VPT Optimisation Pass
|
||||
; CHECK-NEXT: ARM MLA / MLS expansion pass
|
||||
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
|
||||
; CHECK-NEXT: ARM A15 S->D optimizer
|
||||
|
323
test/CodeGen/Thumb2/mve-vpt-blocks.ll
Normal file
323
test/CodeGen/Thumb2/mve-vpt-blocks.ll
Normal file
@ -0,0 +1,323 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
|
||||
|
||||
declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpt_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpt.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptt_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov q3, q0
|
||||
; CHECK-NEXT: vptt.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q3, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q3, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpttt_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpttt.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptttt_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vptttt.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpte_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpte.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptte_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vptte.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorre q0, q1, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptee_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vptee.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vorre q0, q1, q2
|
||||
; CHECK-NEXT: vorre q0, q1, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptet_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vcmp.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpttet_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vcmp.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpstt
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptett_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vcmp.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpstt
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpteet_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpteee_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpteee.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vptete_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vcmp.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpnot
|
||||
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpttte_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpttte.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: vpttee_block:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vpttee.s32 ge, q0, q2
|
||||
; CHECK-NEXT: vorrt q0, q1, q2
|
||||
; CHECK-NEXT: vmovt q0, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: vmove q0, q2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = icmp sge <4 x i32> %a, %c
|
||||
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
|
||||
%2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
|
||||
%3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
|
||||
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
|
||||
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
|
||||
ret <4 x i32> %5
|
||||
}
|
547
test/CodeGen/Thumb2/mve-vpt-optimisations.mir
Normal file
547
test/CodeGen/Thumb2/mve-vpt-optimisations.mir
Normal file
@ -0,0 +1,547 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main-arm-none-eabi"
|
||||
|
||||
; Functions are intentionally left blank - see the MIR sequences below.
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond_and_swapped_operands(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @triple_vcmp(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @killed_vccr_values(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @predicated_vcmps(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @flt_with_swapped_operands(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @different_opcodes(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @incorrect_condcode(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vpr_or_vccr_write_between_vcmps(<4 x float> %inactive1) #0 {
|
||||
entry:
|
||||
ret <4 x float> %inactive1
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
|
||||
...
|
||||
---
|
||||
name: vcmp_with_opposite_cond
|
||||
alignment: 4
|
||||
body: |
|
||||
; CHECK-LABEL: name: vcmp_with_opposite_cond
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32_]], 0, $noreg
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
|
||||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.5(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
|
||||
; CHECK: bb.5:
|
||||
; CHECK: successors: %bb.6(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
|
||||
; CHECK: bb.6:
|
||||
; CHECK: successors: %bb.7(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
|
||||
; CHECK: bb.7:
|
||||
; CHECK: successors: %bb.8(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
|
||||
; CHECK: bb.8:
|
||||
; CHECK: successors: %bb.9(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
|
||||
; CHECK: bb.9:
|
||||
; CHECK: successors: %bb.10(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT9:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
|
||||
; CHECK: bb.10:
|
||||
; CHECK: successors: %bb.11(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT10:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
|
||||
; CHECK: bb.11:
|
||||
; CHECK: successors: %bb.12(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf16r:%[0-9]+]]:vccr = MVE_VCMPf16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT11:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16r]], 0, $noreg
|
||||
; CHECK: bb.12:
|
||||
; CHECK: successors: %bb.13(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf32r:%[0-9]+]]:vccr = MVE_VCMPf32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT12:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32r]], 0, $noreg
|
||||
; CHECK: bb.13:
|
||||
; CHECK: successors: %bb.14(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi16r:%[0-9]+]]:vccr = MVE_VCMPi16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT13:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16r]], 0, $noreg
|
||||
; CHECK: bb.14:
|
||||
; CHECK: successors: %bb.15(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi32r:%[0-9]+]]:vccr = MVE_VCMPi32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT14:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32r]], 0, $noreg
|
||||
; CHECK: bb.15:
|
||||
; CHECK: successors: %bb.16(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi8r:%[0-9]+]]:vccr = MVE_VCMPi8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT15:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8r]], 0, $noreg
|
||||
; CHECK: bb.16:
|
||||
; CHECK: successors: %bb.17(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs16r:%[0-9]+]]:vccr = MVE_VCMPs16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT16:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16r]], 0, $noreg
|
||||
; CHECK: bb.17:
|
||||
; CHECK: successors: %bb.18(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs32r:%[0-9]+]]:vccr = MVE_VCMPs32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT17:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32r]], 0, $noreg
|
||||
; CHECK: bb.18:
|
||||
; CHECK: successors: %bb.19(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs8r:%[0-9]+]]:vccr = MVE_VCMPs8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT18:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8r]], 0, $noreg
|
||||
; CHECK: bb.19:
|
||||
; CHECK: successors: %bb.20(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu16r:%[0-9]+]]:vccr = MVE_VCMPu16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT19:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16r]], 0, $noreg
|
||||
; CHECK: bb.20:
|
||||
; CHECK: successors: %bb.21(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu32r:%[0-9]+]]:vccr = MVE_VCMPu32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT20:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32r]], 0, $noreg
|
||||
; CHECK: bb.21:
|
||||
; CHECK: successors: %bb.22(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu8r:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT21:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r]], 0, $noreg
|
||||
; CHECK: bb.22:
|
||||
; CHECK: [[MVE_VCMPu8r1:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, $zr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT22:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r1]], 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
;
|
||||
; Tests that VCMPs with an opposite condition are correctly converted into VPNOTs.
|
||||
;
|
||||
bb.0:
|
||||
%3:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.1:
|
||||
%5:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%6:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.2:
|
||||
%7:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%8:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.3:
|
||||
%9:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%10:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.4:
|
||||
%11:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%12:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.5:
|
||||
%13:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%14:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.6:
|
||||
%15:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%16:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.7:
|
||||
%17:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%18:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.8:
|
||||
%19:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%20:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.9:
|
||||
%21:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%22:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.10:
|
||||
%23:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%24:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.11:
|
||||
%25:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%26:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.12:
|
||||
%27:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%28:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.13:
|
||||
%29:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%30:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.14:
|
||||
%31:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%32:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.15:
|
||||
%33:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%34:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.16:
|
||||
%35:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%36:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.17:
|
||||
%37:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%38:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.18:
|
||||
%39:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%40:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.19:
|
||||
%41:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%42:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.20:
|
||||
%43:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%44:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.21:
|
||||
%45:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
|
||||
%46:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
|
||||
|
||||
bb.22:
|
||||
; There shouldn't be any exception for $zr, so the second VCMP should
|
||||
; be transformed into a VPNOT.
|
||||
%47:vccr = MVE_VCMPu8r %0:mqpr, $zr, 10, 0, $noreg
|
||||
%48:vccr = MVE_VCMPu8r %0:mqpr, $zr, 11, 0, $noreg
|
||||
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: vcmp_with_opposite_cond_and_swapped_operands
|
||||
alignment: 4
|
||||
body: |
|
||||
; CHECK-LABEL: name: vcmp_with_opposite_cond_and_swapped_operands
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
|
||||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.5(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
|
||||
; CHECK: bb.5:
|
||||
; CHECK: successors: %bb.6(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
|
||||
; CHECK: bb.6:
|
||||
; CHECK: successors: %bb.7(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
|
||||
; CHECK: bb.7:
|
||||
; CHECK: successors: %bb.8(0x80000000)
|
||||
; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
|
||||
; CHECK: bb.8:
|
||||
; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
;
|
||||
; Tests that VCMPs with an opposite condition and swapped operands are
|
||||
; correctly converted into VPNOTs.
|
||||
;
|
||||
bb.0:
|
||||
%2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.1:
|
||||
%4:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%5:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.2:
|
||||
%6:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%7:vccr = MVE_VCMPi8 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.3:
|
||||
%8:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%9:vccr = MVE_VCMPs16 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.4:
|
||||
%10:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%11:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.5:
|
||||
%12:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%13:vccr = MVE_VCMPs8 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.6:
|
||||
%14:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%15:vccr = MVE_VCMPu16 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.7:
|
||||
%16:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%17:vccr = MVE_VCMPu32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.8:
|
||||
%18:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%19:vccr = MVE_VCMPu8 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: triple_vcmp
|
||||
alignment: 4
|
||||
body: |
|
||||
;
|
||||
; Tests that, when there are 2 "VPNOT-like VCMPs" in a row, only the first
|
||||
; becomes a VPNOT.
|
||||
;
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: triple_vcmp
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
|
||||
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 12, 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
%4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: killed_vccr_values
|
||||
alignment: 4
|
||||
body: |
|
||||
bb.0:
|
||||
;
|
||||
; Tests that, if the result of the VCMP is killed before the
|
||||
; second VCMP (that will be converted into a VPNOT) is found,
|
||||
; the kill flag is removed.
|
||||
;
|
||||
; CHECK-LABEL: name: killed_vccr_values
|
||||
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR %1:mqpr, %2:mqpr, 1, [[MVE_VCMPf16_]], undef [[MVE_VORR]]
|
||||
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:mqpr = MVE_VORR %0:mqpr, %1:mqpr, 1, killed %2:vccr, undef %3:mqpr
|
||||
%4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: predicated_vcmps
|
||||
alignment: 4
|
||||
body: |
|
||||
; CHECK-LABEL: name: predicated_vcmps
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPi16_1:%[0-9]+]]:vccr = MVE_VCMPi16 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi16_]]
|
||||
; CHECK: [[MVE_VCMPi16_2:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPi32_1:%[0-9]+]]:vccr = MVE_VCMPi32 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi32_]]
|
||||
; CHECK: [[MVE_VCMPi32_2:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_]]
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf16_]]
|
||||
; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf16_]]
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf32_]]
|
||||
; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf32_]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.5(0x80000000)
|
||||
; CHECK: [[MVE_VCMPi16_3:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPi16_4:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi16_3]]
|
||||
; CHECK: [[MVE_VCMPi16_5:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_3]]
|
||||
; CHECK: bb.5:
|
||||
; CHECK: [[MVE_VCMPi32_3:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPi32_4:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi32_3]]
|
||||
; CHECK: [[MVE_VCMPi32_5:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_3]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
;
|
||||
; Tests that predicated VCMPs are not replaced.
|
||||
;
|
||||
bb.0:
|
||||
%2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 1, %2:vccr
|
||||
%4:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %2:vccr
|
||||
|
||||
bb.1:
|
||||
%5:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%6:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 1, %5:vccr
|
||||
%7:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %5:vccr
|
||||
|
||||
bb.2:
|
||||
%8:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%9:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 1, %8:vccr
|
||||
%10:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 1, %8:vccr
|
||||
|
||||
bb.3:
|
||||
%11:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%12:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 1, %11:vccr
|
||||
%13:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 1, %11:vccr
|
||||
|
||||
bb.4:
|
||||
%14:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%15:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 1, %14:vccr
|
||||
%16:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %14:vccr
|
||||
|
||||
bb.5:
|
||||
%17:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%18:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 1, %17:vccr
|
||||
%19:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %17:vccr
|
||||
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: flt_with_swapped_operands
|
||||
alignment: 4
|
||||
body: |
|
||||
; CHECK-LABEL: name: flt_with_swapped_operands
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 12, 0, $noreg
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 12, 0, $noreg
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf16_3:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
; CHECK: bb.3:
|
||||
; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPf32_3:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
;
|
||||
; Tests that float VCMPs with an opposite condition and swapped operands
|
||||
; are not transformed into VPNOTs.
|
||||
;
|
||||
bb.0:
|
||||
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.1:
|
||||
%4:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%5:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 12, 0, $noreg
|
||||
|
||||
bb.2:
|
||||
%6:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%7:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 11, 0, $noreg
|
||||
|
||||
bb.3:
|
||||
%8:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%9:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 11, 0, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: different_opcodes
|
||||
alignment: 4
|
||||
body: |
|
||||
;
|
||||
; Tests that a "VPNOT-like VCMP" with an opcode different from the previous VCMP
|
||||
; is not transformed into a VPNOT.
|
||||
;
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: different_opcodes
|
||||
; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 0, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 1, 1, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
%2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 0, 0, $noreg
|
||||
%3:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 1, 1, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: incorrect_condcode
|
||||
alignment: 4
|
||||
body: |
|
||||
; CHECK-LABEL: name: incorrect_condcode
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 11, 0, $noreg
|
||||
; CHECK: bb.1:
|
||||
; CHECK: [[MVE_VCMPs32_2:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
|
||||
; CHECK: [[MVE_VCMPs32_3:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
;
|
||||
; Tests that a VCMP is not transformed into a VPNOT if its CondCode is not
|
||||
; the opposite CondCode.
|
||||
;
|
||||
bb.0:
|
||||
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 11, 0, $noreg
|
||||
bb.1:
|
||||
%4:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
%5:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
||||
---
|
||||
name: vpr_or_vccr_write_between_vcmps
|
||||
alignment: 4
|
||||
body: |
|
||||
;
|
||||
; Tests that a "VPNOT-like VCMP" will not be transformed into a VPNOT if
|
||||
; VCCR/VPR is written to in-between.
|
||||
;
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: vpr_or_vccr_write_between_vcmps
|
||||
; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
|
||||
; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT killed [[MVE_VCMPs32_]], 0, $noreg
|
||||
; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 10, 0, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
|
||||
%2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
|
||||
%3:vccr = MVE_VPNOT killed %2:vccr, 0, $noreg
|
||||
%4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 10, 0, $noreg
|
||||
tBX_RET 14, $noreg, implicit %0:mqpr
|
||||
...
|
Loading…
Reference in New Issue
Block a user