mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[ARM] Introduce t2WhileLoopStartTP
This adds t2WhileLoopStartTP, similar to the t2DoLoopStartTP added in D90591. It keeps a reference to both the tripcount register and the element count register, so that the ARMLowOverheadLoops pass in the backend can pick the correct one without having to search for it from the operand of a VCTP. Differential Revision: https://reviews.llvm.org/D103236
This commit is contained in:
parent
416150a164
commit
9fd9749580
@ -6122,8 +6122,9 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
|
||||
// Be conservative with ARMv8.1 MVE instructions.
|
||||
if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
|
||||
Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
|
||||
Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2LoopDec ||
|
||||
Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec)
|
||||
Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
|
||||
Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
|
||||
Opc == ARM::t2LoopEndDec)
|
||||
return outliner::InstrType::Illegal;
|
||||
|
||||
const MCInstrDesc &MCID = MI.getDesc();
|
||||
|
@ -367,7 +367,8 @@ public:
|
||||
bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override {
|
||||
return MI->getOpcode() == ARM::t2LoopEndDec ||
|
||||
MI->getOpcode() == ARM::t2DoLoopStartTP ||
|
||||
MI->getOpcode() == ARM::t2WhileLoopStartLR;
|
||||
MI->getOpcode() == ARM::t2WhileLoopStartLR ||
|
||||
MI->getOpcode() == ARM::t2WhileLoopStartTP;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -645,12 +646,6 @@ static inline bool isJumpTableBranchOpcode(int Opc) {
|
||||
Opc == ARM::t2BR_JT;
|
||||
}
|
||||
|
||||
static inline bool isLowOverheadTerminatorOpcode(int Opc) {
|
||||
return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
|
||||
Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2LoopEnd ||
|
||||
Opc == ARM::t2LoopEndDec;
|
||||
}
|
||||
|
||||
static inline
|
||||
bool isIndirectBranchOpcode(int Opc) {
|
||||
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "ARMBaseInstrInfo.h"
|
||||
#include "ARMBasicBlockInfo.h"
|
||||
#include "ARMSubtarget.h"
|
||||
#include "MVETailPredUtils.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
@ -61,13 +62,13 @@ INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
|
||||
|
||||
static MachineInstr *findWLSInBlock(MachineBasicBlock *MBB) {
|
||||
for (auto &Terminator : MBB->terminators()) {
|
||||
if (Terminator.getOpcode() == ARM::t2WhileLoopStartLR)
|
||||
if (isWhileLoopStart(Terminator))
|
||||
return &Terminator;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Find t2WhileLoopStartLR in the loop predecessor BB or otherwise in its only
|
||||
/// Find WhileLoopStart in the loop predecessor BB or otherwise in its only
|
||||
/// predecessor. If found, returns (BB, WLS Instr) pair, otherwise a null pair.
|
||||
static MachineInstr *findWLS(MachineLoop *ML) {
|
||||
MachineBasicBlock *Predecessor = ML->getLoopPredecessor();
|
||||
@ -93,7 +94,7 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *Predecessor = WlsInstr->getParent();
|
||||
MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB();
|
||||
MachineBasicBlock *LoopExit = getWhileLoopStartTargetBB(*WlsInstr);
|
||||
|
||||
// We don't want to move Preheader to before the function's entry block.
|
||||
if (!LoopExit->getPrevNode())
|
||||
@ -118,9 +119,9 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
|
||||
++It) {
|
||||
MachineBasicBlock *MBB = &*It;
|
||||
for (auto &Terminator : MBB->terminators()) {
|
||||
if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR)
|
||||
if (!isWhileLoopStart(Terminator))
|
||||
continue;
|
||||
MachineBasicBlock *WLSTarget = Terminator.getOperand(2).getMBB();
|
||||
MachineBasicBlock *WLSTarget = getWhileLoopStartTargetBB(Terminator);
|
||||
// TODO: Analyse the blocks to make a decision if it would be worth
|
||||
// moving Preheader even if we'd introduce a backwards WLS
|
||||
if (WLSTarget == Predecessor) {
|
||||
|
@ -5479,8 +5479,8 @@ let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
|
||||
// t2DoLoopStart a pseudo for DLS hardware loops. Lowered into a DLS in
|
||||
// ARMLowOverheadLoops if possible, or reverted to a Mov if not.
|
||||
def t2DoLoopStart :
|
||||
t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
|
||||
[(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
|
||||
t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$tc), 4, IIC_Br,
|
||||
[(set GPRlr:$X, (int_start_loop_iterations rGPR:$tc))]>;
|
||||
|
||||
// A pseudo for a DLSTP, created in the MVETPAndVPTOptimizationPass from a
|
||||
// t2DoLoopStart if the loops is tail predicated. Holds both the element
|
||||
@ -5488,7 +5488,7 @@ def t2DoLoopStart :
|
||||
// ARMLowOverheadLoops when it is converted to a DLSTP or DLS as required.
|
||||
let isTerminator = 1, hasSideEffects = 1 in
|
||||
def t2DoLoopStartTP :
|
||||
t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>;
|
||||
t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$tc, rGPR:$elts), 4, IIC_Br, []>;
|
||||
|
||||
// Setup for a t2WhileLoopStart. A pair of t2WhileLoopSetup and t2WhileLoopStart
|
||||
// will be created post-ISel from a llvm.test.start.loop.iterations. This
|
||||
@ -5496,7 +5496,7 @@ def t2DoLoopStartTP :
|
||||
// valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations
|
||||
// into a t2WhileLoopStartLR (or expanded).
|
||||
def t2WhileLoopSetup :
|
||||
t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$elts), 4, IIC_Br, []>;
|
||||
t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>;
|
||||
|
||||
// A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and
|
||||
// t2LoopEnd together represent a LE instruction. Ideally these are converted
|
||||
@ -5511,7 +5511,7 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
|
||||
// into a t2WhileLoopStartLR that does both the LR setup and branch.
|
||||
def t2WhileLoopStart :
|
||||
t2PseudoInst<(outs),
|
||||
(ins GPRlr:$elts, brtarget:$target),
|
||||
(ins GPRlr:$tc, brtarget:$target),
|
||||
4, IIC_Br, []>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
@ -5521,13 +5521,21 @@ def t2WhileLoopStart :
|
||||
// converted into t2CMP and t2Bcc.
|
||||
def t2WhileLoopStartLR :
|
||||
t2PseudoInst<(outs GPRlr:$lr),
|
||||
(ins rGPR:$elts, brtarget:$target),
|
||||
(ins rGPR:$tc, brtarget:$target),
|
||||
8, IIC_Br, []>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
// Similar to a t2DoLoopStartTP, a t2WhileLoopStartTP is a pseudo for a WLSTP
|
||||
// holding both the element count and the tripcount of the loop.
|
||||
def t2WhileLoopStartTP :
|
||||
t2PseudoInst<(outs GPRlr:$lr),
|
||||
(ins rGPR:$tc, rGPR:$elts, brtarget:$target),
|
||||
8, IIC_Br, []>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
// t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair.
|
||||
def t2LoopEnd :
|
||||
t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
|
||||
t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
|
||||
8, IIC_Br, []>, Sched<[WriteBr]>;
|
||||
|
||||
// The combination of a t2LoopDec and t2LoopEnd, performing both the LR
|
||||
@ -5535,7 +5543,7 @@ def t2LoopEnd :
|
||||
// LETP in ARMLowOverheadLoops as appropriate, or converted to t2CMP/t2Bcc
|
||||
// if the branches are out of range.
|
||||
def t2LoopEndDec :
|
||||
t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target),
|
||||
t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$tc, brtarget:$target),
|
||||
8, IIC_Br, []>, Sched<[WriteBr]>;
|
||||
|
||||
} // end isBranch, isTerminator, hasSideEffects
|
||||
|
@ -101,10 +101,6 @@ static bool shouldInspect(MachineInstr &MI) {
|
||||
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
|
||||
}
|
||||
|
||||
static bool isDo(MachineInstr *MI) {
|
||||
return MI->getOpcode() != ARM::t2WhileLoopStartLR;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
using InstSet = SmallPtrSetImpl<MachineInstr *>;
|
||||
@ -446,7 +442,7 @@ namespace {
|
||||
}
|
||||
|
||||
unsigned getStartOpcode() const {
|
||||
bool IsDo = isDo(Start);
|
||||
bool IsDo = isDoLoopStart(*Start);
|
||||
if (!IsTailPredicationLegal())
|
||||
return IsDo ? ARM::t2DLS : ARM::t2WLS;
|
||||
|
||||
@ -635,7 +631,8 @@ bool LowOverheadLoop::ValidateTailPredicate() {
|
||||
// elements is provided to the vctp instruction, so we need to check that
|
||||
// we can use this register at InsertPt.
|
||||
MachineInstr *VCTP = VCTPs.back();
|
||||
if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
|
||||
if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
|
||||
Start->getOpcode() == ARM::t2WhileLoopStartTP) {
|
||||
TPNumElements = Start->getOperand(2);
|
||||
StartInsertPt = Start;
|
||||
StartInsertBB = Start->getParent();
|
||||
@ -778,10 +775,12 @@ bool LowOverheadLoop::ValidateTailPredicate() {
|
||||
}
|
||||
}
|
||||
|
||||
// If we converted the LoopStart to a t2DoLoopStartTP, we can also remove any
|
||||
// extra instructions in the preheader, which often includes a now unused MOV.
|
||||
if (Start->getOpcode() == ARM::t2DoLoopStartTP && Preheader &&
|
||||
!Preheader->empty() &&
|
||||
// If we converted the LoopStart to a t2DoLoopStartTP/t2WhileLoopStartTP, we
|
||||
// can also remove any extra instructions in the preheader, which often
|
||||
// includes a now unused MOV.
|
||||
if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
|
||||
Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
|
||||
Preheader && !Preheader->empty() &&
|
||||
!RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) {
|
||||
if (auto *Def = RDA.getUniqueReachingMIDef(
|
||||
&Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) {
|
||||
@ -1045,12 +1044,13 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Start->getOpcode() == ARM::t2WhileLoopStartLR &&
|
||||
(BBUtils->getOffsetOf(Start) >
|
||||
BBUtils->getOffsetOf(Start->getOperand(2).getMBB()) ||
|
||||
!BBUtils->isBBInRange(Start, Start->getOperand(2).getMBB(), 4094))) {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
|
||||
return false;
|
||||
if (isWhileLoopStart(*Start)) {
|
||||
MachineBasicBlock *TargetBB = getWhileLoopStartTargetBB(*Start);
|
||||
if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
|
||||
!BBUtils->isBBInRange(Start, TargetBB, 4094)) {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
@ -1289,7 +1289,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
||||
// another low register.
|
||||
void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
|
||||
MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
|
||||
MachineBasicBlock *DestBB = getWhileLoopStartTargetBB(*MI);
|
||||
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
|
||||
ARM::tBcc : ARM::t2Bcc;
|
||||
|
||||
@ -1426,8 +1426,8 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
|
||||
|
||||
MIB.addDef(ARM::LR);
|
||||
MIB.add(Count);
|
||||
if (!isDo(Start))
|
||||
MIB.add(Start->getOperand(2));
|
||||
if (isWhileLoopStart(*Start))
|
||||
MIB.addMBB(getWhileLoopStartTargetBB(*Start));
|
||||
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
|
||||
NewStart = &*MIB;
|
||||
@ -1612,7 +1612,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
|
||||
};
|
||||
|
||||
if (LoLoop.Revert) {
|
||||
if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStartLR)
|
||||
if (isWhileLoopStart(*LoLoop.Start))
|
||||
RevertWhile(LoLoop.Start);
|
||||
else
|
||||
RevertDo(LoLoop.Start);
|
||||
@ -1683,7 +1683,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
|
||||
Changed = true;
|
||||
|
||||
for (auto *Start : Starts) {
|
||||
if (Start->getOpcode() == ARM::t2WhileLoopStartLR)
|
||||
if (isWhileLoopStart(*Start))
|
||||
RevertWhile(Start);
|
||||
else
|
||||
RevertDo(Start);
|
||||
|
@ -429,7 +429,8 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
|
||||
MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
|
||||
if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
|
||||
return false;
|
||||
if (LoopDec != LoopEnd || LoopStart->getOpcode() != ARM::t2DoLoopStart)
|
||||
if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
|
||||
LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
|
||||
return false;
|
||||
|
||||
SmallVector<MachineInstr *, 4> VCTPs;
|
||||
@ -494,12 +495,16 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
|
||||
TII->get(ARM::t2DoLoopStartTP))
|
||||
.add(LoopStart->getOperand(0))
|
||||
.add(LoopStart->getOperand(1))
|
||||
.addReg(CountReg);
|
||||
(void)MI;
|
||||
unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
|
||||
? ARM::t2DoLoopStartTP
|
||||
: ARM::t2WhileLoopStartTP;
|
||||
MachineInstrBuilder MI =
|
||||
BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
|
||||
.add(LoopStart->getOperand(0))
|
||||
.add(LoopStart->getOperand(1))
|
||||
.addReg(CountReg);
|
||||
if (NewOpc == ARM::t2WhileLoopStartTP)
|
||||
MI.add(LoopStart->getOperand(2));
|
||||
LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
|
||||
<< *MI.getInstr());
|
||||
MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
|
||||
|
@ -68,11 +68,26 @@ static inline bool isVCTP(const MachineInstr *MI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool isLoopStart(MachineInstr &MI) {
|
||||
static inline bool isDoLoopStart(const MachineInstr &MI) {
|
||||
return MI.getOpcode() == ARM::t2DoLoopStart ||
|
||||
MI.getOpcode() == ARM::t2DoLoopStartTP ||
|
||||
MI.getOpcode() == ARM::t2WhileLoopStart ||
|
||||
MI.getOpcode() == ARM::t2WhileLoopStartLR;
|
||||
MI.getOpcode() == ARM::t2DoLoopStartTP;
|
||||
}
|
||||
|
||||
static inline bool isWhileLoopStart(const MachineInstr &MI) {
|
||||
return MI.getOpcode() == ARM::t2WhileLoopStart ||
|
||||
MI.getOpcode() == ARM::t2WhileLoopStartLR ||
|
||||
MI.getOpcode() == ARM::t2WhileLoopStartTP;
|
||||
}
|
||||
|
||||
static inline bool isLoopStart(const MachineInstr &MI) {
|
||||
return isDoLoopStart(MI) || isWhileLoopStart(MI);
|
||||
}
|
||||
|
||||
// Return the TargetBB stored in a t2WhileLoopStartLR/t2WhileLoopStartTP.
|
||||
inline MachineBasicBlock *getWhileLoopStartTargetBB(const MachineInstr &MI) {
|
||||
assert(isWhileLoopStart(MI) && "Expected WhileLoopStart!");
|
||||
unsigned Op = MI.getOpcode() == ARM::t2WhileLoopStartTP ? 3 : 2;
|
||||
return MI.getOperand(Op).getMBB();
|
||||
}
|
||||
|
||||
// WhileLoopStart holds the exit block, so produce a subs Op0, Op1, 0 and then a
|
||||
@ -84,8 +99,9 @@ inline void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII,
|
||||
unsigned BrOpc = ARM::t2Bcc,
|
||||
bool UseCmp = false) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
assert(MI->getOpcode() == ARM::t2WhileLoopStartLR &&
|
||||
"Only expected a t2WhileLoopStartLR in RevertWhileLoopStartLR!");
|
||||
assert((MI->getOpcode() == ARM::t2WhileLoopStartLR ||
|
||||
MI->getOpcode() == ARM::t2WhileLoopStartTP) &&
|
||||
"Only expected a t2WhileLoopStartLR/TP in RevertWhileLoopStartLR!");
|
||||
|
||||
// Subs/Cmp
|
||||
if (UseCmp) {
|
||||
@ -109,8 +125,8 @@ inline void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII,
|
||||
// Branch
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
|
||||
MIB.add(MI->getOperand(2)); // branch target
|
||||
MIB.addImm(ARMCC::EQ); // condition code
|
||||
MIB.addMBB(getWhileLoopStartTargetBB(*MI)); // branch target
|
||||
MIB.addImm(ARMCC::EQ); // condition code
|
||||
MIB.addReg(ARM::CPSR);
|
||||
|
||||
MI->eraseFromParent();
|
||||
|
@ -17,8 +17,7 @@ define void @test_memcpy(i32* nocapture %x, i32* nocapture readonly %y, i32 %n,
|
||||
; CHECK-NEXT: @ Child Loop BB0_4 Depth 2
|
||||
; CHECK-NEXT: adds r4, r1, r7
|
||||
; CHECK-NEXT: adds r5, r0, r7
|
||||
; CHECK-NEXT: mov r6, r3
|
||||
; CHECK-NEXT: wlstp.8 lr, r6, .LBB0_3
|
||||
; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_3
|
||||
; CHECK-NEXT: b .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_3: @ %for.body
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
|
||||
@ -71,8 +70,7 @@ define void @test_memset(i32* nocapture %x, i32 %n, i32 %m) {
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB1_4 Depth 2
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r3, r2
|
||||
; CHECK-NEXT: wlstp.8 lr, r3, .LBB1_3
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB1_3
|
||||
; CHECK-NEXT: b .LBB1_4
|
||||
; CHECK-NEXT: .LBB1_3: @ %for.body
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_2 Depth=1
|
||||
@ -285,8 +283,7 @@ define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) {
|
||||
; CHECK-NEXT: @ %bb.1: @ %prehead
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: mov r12, r0
|
||||
; CHECK-NEXT: mov r3, r2
|
||||
; CHECK-NEXT: wlstp.8 lr, r3, .LBB6_3
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB6_3
|
||||
; CHECK-NEXT: .LBB6_2: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r12], #16
|
||||
; CHECK-NEXT: letp lr, .LBB6_2
|
||||
|
@ -63,11 +63,11 @@ body: |
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2WhileLoopStartLR:%[0-9]+]]:gprlr = t2WhileLoopStartLR killed [[t2LSRri]], %bb.3, implicit-def $cpsr
|
||||
; CHECK: [[t2WhileLoopStartTP:%[0-9]+]]:gprlr = t2WhileLoopStartTP killed [[t2LSRri]], [[COPY]], %bb.3, implicit-def $cpsr
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %11, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopStartLR]], %bb.1, %13, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopStartTP]], %bb.1, %13, %bb.2
|
||||
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %15, %bb.2
|
||||
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
|
||||
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
|
@ -634,8 +634,7 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
|
||||
; CHECK-NEXT: @ in Loop: Header=BB10_5 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r3, r0, r5, lsl #1
|
||||
; CHECK-NEXT: mov r5, r6
|
||||
; CHECK-NEXT: wlstp.8 lr, r5, .LBB10_4
|
||||
; CHECK-NEXT: wlstp.8 lr, r6, .LBB10_4
|
||||
; CHECK-NEXT: b .LBB10_15
|
||||
; CHECK-NEXT: .LBB10_4: @ %for.cond1.for.cond.cleanup3_crit_edge.us
|
||||
; CHECK-NEXT: @ in Loop: Header=BB10_5 Depth=1
|
||||
|
@ -235,8 +235,7 @@ define void @test11(i8* nocapture %x, i8* nocapture %y, i32 %n) {
|
||||
; CHECK-NEXT: .LBB10_1: @ %prehead
|
||||
; CHECK-NEXT: mov r12, r1
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r3, r2
|
||||
; CHECK-NEXT: wlstp.8 lr, r3, .LBB10_3
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3
|
||||
; CHECK-NEXT: .LBB10_2: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r12], #16
|
||||
; CHECK-NEXT: vstrb.8 q0, [r4], #16
|
||||
@ -318,8 +317,7 @@ define void @twoloops(i32* %X, i32 %n, i32 %m) {
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: mov r3, r0
|
||||
; CHECK-NEXT: mov r1, r2
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB13_2
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_2
|
||||
; CHECK-NEXT: .LBB13_1: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r3], #16
|
||||
; CHECK-NEXT: letp lr, .LBB13_1
|
||||
@ -489,8 +487,7 @@ define void @multilooped_exit(i32 %b) {
|
||||
; CHECK-NEXT: movt r3, :upper16:arr_56
|
||||
; CHECK-NEXT: lsr.w r12, r1, #4
|
||||
; CHECK-NEXT: mov r2, r3
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB18_5
|
||||
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_5
|
||||
; CHECK-NEXT: .LBB18_4: @ Parent Loop BB18_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #16
|
||||
@ -498,8 +495,7 @@ define void @multilooped_exit(i32 %b) {
|
||||
; CHECK-NEXT: .LBB18_5: @ %loop
|
||||
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
|
||||
; CHECK-NEXT: mov r2, r3
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB18_7
|
||||
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_7
|
||||
; CHECK-NEXT: .LBB18_6: @ Parent Loop BB18_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #16
|
||||
@ -507,8 +503,7 @@ define void @multilooped_exit(i32 %b) {
|
||||
; CHECK-NEXT: .LBB18_7: @ %loop
|
||||
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
|
||||
; CHECK-NEXT: mov r2, r3
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB18_9
|
||||
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_9
|
||||
; CHECK-NEXT: .LBB18_8: @ Parent Loop BB18_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #16
|
||||
@ -567,12 +562,10 @@ define i32 @reverted(i1 zeroext %b) {
|
||||
; CHECK-NEXT: movw r0, :lower16:arr_22
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: movt r0, :upper16:arr_22
|
||||
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r1, r2, #15
|
||||
; CHECK-NEXT: lsrs r3, r1, #4
|
||||
; CHECK-NEXT: mov r1, r2
|
||||
; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_2
|
||||
; CHECK-NEXT: strd r3, r2, [sp] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_2
|
||||
; CHECK-NEXT: .LBB19_1: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB19_1
|
||||
@ -621,11 +614,12 @@ define i32 @reverted(i1 zeroext %b) {
|
||||
; CHECK-NEXT: le lr, .LBB19_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup6
|
||||
; CHECK-NEXT: movw r0, :lower16:arr_22
|
||||
; CHECK-NEXT: ldrd r2, r1, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: movt r0, :upper16:arr_22
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r0, r0, #1824
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_6
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_6
|
||||
; CHECK-NEXT: .LBB19_5: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q1, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB19_5
|
||||
@ -675,11 +669,12 @@ define i32 @reverted(i1 zeroext %b) {
|
||||
; CHECK-NEXT: le lr, .LBB19_7
|
||||
; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup6.1
|
||||
; CHECK-NEXT: movw r0, :lower16:arr_22
|
||||
; CHECK-NEXT: ldrd r2, r1, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: movt r0, :upper16:arr_22
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r0, r0, #3648
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_10
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_10
|
||||
; CHECK-NEXT: .LBB19_9: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q1, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB19_9
|
||||
@ -731,19 +726,14 @@ define i32 @reverted(i1 zeroext %b) {
|
||||
; CHECK-NEXT: le lr, .LBB19_11
|
||||
; CHECK-NEXT: @ %bb.12: @ %for.cond.cleanup6.2
|
||||
; CHECK-NEXT: movw r0, :lower16:arr_22
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r2, r1, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: movt r0, :upper16:arr_22
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: add.w r0, r0, #5472
|
||||
; CHECK-NEXT: wls lr, r1, .LBB19_14
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_14
|
||||
; CHECK-NEXT: .LBB19_13: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: vctp.8 r1
|
||||
; CHECK-NEXT: subs r1, #16
|
||||
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vstrbt.8 q1, [r0], #16
|
||||
; CHECK-NEXT: le lr, .LBB19_13
|
||||
; CHECK-NEXT: vstrb.8 q1, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB19_13
|
||||
; CHECK-NEXT: .LBB19_14: @ %for.cond.cleanup6.2
|
||||
; CHECK-NEXT: movw r2, :lower16:arr_21
|
||||
; CHECK-NEXT: movw r1, #5508
|
||||
|
Loading…
Reference in New Issue
Block a user