1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM] Alter t2DoLoopStart to define lr

This changes the definition of t2DoLoopStart from
t2DoLoopStart rGPR
to
GPRlr = t2DoLoopStart rGPR

This will hopefully mean that low overhead loops are more tied together,
and we can more reliably generate loops without reverting or being at
the whims of the register allocator.

This is a fairly simple change in itself, but leads to a number of other
required alterations.

 - The hardware loop pass, if UsePhi is set, now generates loops of the
   form:
       %start = llvm.start.loop.iterations(%N)
     loop:
       %p = phi [%start], [%dec]
       %dec = llvm.loop.decrement.reg(%p, 1)
       %c = icmp ne %dec, 0
       br %c, loop, exit
 - For this a new llvm.start.loop.iterations intrinsic was added, identical
   to llvm.set.loop.iterations but produces a value as seen above, gluing
   the loop together more through def-use chains.
 - This new instrinsic conceptually produces the same output as input,
   which is taught to SCEV so that the checks in MVETailPredication are not
   affected.
 - Some minor changes are needed to the ARMLowOverheadLoop pass, but it has
   been left mostly as before. We should now more reliably be able to tell
   that the t2DoLoopStart is correct without having to prove it, but
   t2WhileLoopStart and tail-predicated loops will remain the same.
 - And all the tests have been updated. There are a lot of them!

This patch on it's own might cause more trouble that it helps, with more
tail-predicated loops being reverted, but some additional patches can
hopefully improve upon that to get to something that is better overall.

Differential Revision: https://reviews.llvm.org/D89881
This commit is contained in:
David Green 2020-11-10 15:57:58 +00:00
parent 13bea66c92
commit 0773b05cfa
145 changed files with 2166 additions and 2227 deletions

View File

@ -15502,6 +15502,45 @@ on their operand. It's a hint to the backend that can use this to set up the
hardware-loop count with a target specific instruction, usually a move of this
value to a special register or a hardware-loop instruction.
'``llvm.start.loop.iterations.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i64 @llvm.start.loop.iterations.i64(i64)
Overview:
"""""""""
The '``llvm.start.loop.iterations.*``' intrinsics are similar to the
'``llvm.set.loop.iterations.*``' intrinsics, used to specify the
hardware-loop trip count but also produce a value identical to the input
that can be used as the input to the loop. They are placed in the loop
preheader basic block and the output is expected to be the input to the
phi for the induction variable of the loop, decremented by the
'``llvm.loop.decrement.reg.*``'.
Arguments:
""""""""""
The integer operand is the loop trip count of the hardware-loop, and thus
not e.g. the loop back-edge taken count.
Semantics:
""""""""""
The '``llvm.start.loop.iterations.*``' intrinsics do not perform any arithmetic
on their operand. It's a hint to the backend that can use this to set up the
hardware-loop count with a target specific instruction, usually a move of this
value to a special register or a hardware-loop instruction.
'``llvm.test.set.loop.iterations.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -1576,6 +1576,11 @@ def int_matrix_column_major_store
def int_set_loop_iterations :
DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>;
// Same as the above, but produces a value (the same as the input operand) to
// be fed into the loop.
def int_start_loop_iterations :
DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoDuplicate]>;
// Specify that the value given is the number of iterations that the next loop
// will execute. Also test that the given count is not zero, allowing it to
// control entry to a 'while' loop.

View File

@ -6672,6 +6672,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y));
return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
}
case Intrinsic::start_loop_iterations:
// A start_loop_iterations is just equivalent to the first operand for
// SCEV purposes.
return getSCEV(II->getArgOperand(0));
default:
break;
}

View File

@ -165,7 +165,7 @@ namespace {
Value *InitLoopCount();
// Insert the set_loop_iteration intrinsic.
void InsertIterationSetup(Value *LoopCountInit);
Value *InsertIterationSetup(Value *LoopCountInit);
// Insert the loop_decrement intrinsic.
void InsertLoopDec();
@ -325,11 +325,11 @@ void HardwareLoop::Create() {
return;
}
InsertIterationSetup(LoopCountInit);
Value *Setup = InsertIterationSetup(LoopCountInit);
if (UsePHICounter || ForceHardwareLoopPHI) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
UpdateBranch(LoopDec);
} else
@ -437,11 +437,13 @@ Value *HardwareLoop::InitLoopCount() {
return Count;
}
void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
Intrinsic::ID ID = UseLoopGuard ?
Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations
: (UsePhi ? Intrinsic::start_loop_iterations
: Intrinsic::set_loop_iterations);
Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
@ -457,6 +459,7 @@ void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
}
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
<< *SetCount << "\n");
return UseLoopGuard ? LoopCountInit : SetCount;
}
void HardwareLoop::InsertLoopDec() {

View File

@ -5420,9 +5420,11 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
let isTerminator = 1;
}
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
def t2DoLoopStart :
t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
[(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
[(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
let hasSideEffects = 0 in
def t2LoopDec :
@ -5444,6 +5446,8 @@ def t2LoopEnd :
} // end isBranch, isTerminator, hasSideEffects
}
} // end isNotDuplicable
class CS<string iname, bits<4> opcode, list<dag> pattern=[]>

View File

@ -429,7 +429,10 @@ namespace {
// Return the operand for the loop start instruction. This will be the loop
// iteration count, or the number of elements if we're tail predicating.
MachineOperand &getLoopStartOperand() {
return IsTailPredicationLegal() ? TPNumElements : Start->getOperand(0);
if (IsTailPredicationLegal())
return TPNumElements;
return Start->getOpcode() == ARM::t2DoLoopStart ? Start->getOperand(1)
: Start->getOperand(0);
}
unsigned getStartOpcode() const {
@ -495,6 +498,7 @@ namespace {
bool RevertNonLoops();
void RevertWhile(MachineInstr *MI) const;
void RevertDo(MachineInstr *MI) const;
bool RevertLoopDec(MachineInstr *MI) const;
@ -618,8 +622,12 @@ bool LowOverheadLoop::ValidateTailPredicate() {
// count instead of iteration count, won't affect any other instructions
// than the LoopStart and LoopDec.
// TODO: We should try to insert the [W|D]LSTP after any of the other uses.
if (StartInsertPt == Start && Start->getOperand(0).getReg() == ARM::LR) {
if (auto *IterCount = RDA.getMIOperand(Start, 0)) {
Register StartReg = Start->getOpcode() == ARM::t2DoLoopStart
? Start->getOperand(1).getReg()
: Start->getOperand(0).getReg();
if (StartInsertPt == Start && StartReg == ARM::LR) {
if (auto *IterCount = RDA.getMIOperand(
Start, Start->getOpcode() == ARM::t2DoLoopStart ? 1 : 0)) {
SmallPtrSet<MachineInstr *, 2> Uses;
RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
for (auto *Use : Uses) {
@ -1053,53 +1061,15 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
MachineBasicBlock *&InsertBB,
ReachingDefAnalysis &RDA,
InstSet &ToRemove) {
// We can define LR because LR already contains the same value.
if (Start->getOperand(0).getReg() == ARM::LR) {
// For a t2DoLoopStart it is always valid to use the start insertion point.
// For WLS we can define LR if LR already contains the same value.
if (Start->getOpcode() == ARM::t2DoLoopStart ||
Start->getOperand(0).getReg() == ARM::LR) {
InsertPt = MachineBasicBlock::iterator(Start);
InsertBB = Start->getParent();
return true;
}
Register CountReg = Start->getOperand(0).getReg();
auto IsMoveLR = [&CountReg](MachineInstr *MI) {
return MI->getOpcode() == ARM::tMOVr &&
MI->getOperand(0).getReg() == ARM::LR &&
MI->getOperand(1).getReg() == CountReg &&
MI->getOperand(2).getImm() == ARMCC::AL;
};
// Find an insertion point:
// - Is there a (mov lr, Count) before Start? If so, and nothing else
// writes to Count before Start, we can insert at start.
if (auto *LRDef =
RDA.getUniqueReachingMIDef(Start, MCRegister::from(ARM::LR))) {
if (IsMoveLR(LRDef) &&
RDA.hasSameReachingDef(Start, LRDef, CountReg.asMCReg())) {
SmallPtrSet<MachineInstr *, 2> Ignore = { Dec };
if (!TryRemove(LRDef, RDA, ToRemove, Ignore))
return false;
InsertPt = MachineBasicBlock::iterator(Start);
InsertBB = Start->getParent();
return true;
}
}
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes
// to Count after Start, we can insert at that mov (which will now be
// dead).
MachineBasicBlock *MBB = Start->getParent();
if (auto *LRDef =
RDA.getLocalLiveOutMIDef(MBB, MCRegister::from(ARM::LR))) {
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) {
SmallPtrSet<MachineInstr *, 2> Ignore = { Start, Dec };
if (!TryRemove(LRDef, RDA, ToRemove, Ignore))
return false;
InsertPt = MachineBasicBlock::iterator(LRDef);
InsertBB = LRDef->getParent();
return true;
}
}
// We've found no suitable LR def and Start doesn't use LR directly. Can we
// just define LR anyway?
if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR)))
@ -1364,6 +1334,16 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
MI->eraseFromParent();
}
void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.add(predOps(ARMCC::AL));
MI->eraseFromParent();
}
bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
@ -1432,7 +1412,7 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
//
// $lr = big-itercount-expression
// ..
// t2DoLoopStart renamable $lr
// $lr = t2DoLoopStart renamable $lr
// vector.body:
// ..
// $vpr = MVE_VCTP32 renamable $r3
@ -1455,7 +1435,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 0);
MachineInstr *Def = RDA->getMIOperand(
LoLoop.Start, LoLoop.Start->getOpcode() == ARM::t2DoLoopStart ? 1 : 0);
if (!Def) {
LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
return;
@ -1634,7 +1615,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(LoLoop.Start);
else
LoLoop.Start->eraseFromParent();
RevertDo(LoLoop.Start);
bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec);
RevertLoopEnd(LoLoop.End, FlagsAlreadySet);
} else {
@ -1699,7 +1680,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
if (Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(Start);
else
Start->eraseFromParent();
RevertDo(Start);
}
for (auto *Dec : Decs)
RevertLoopDec(Dec);

View File

@ -1679,7 +1679,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
switch (Call->getIntrinsicID()) {
default:
break;
case Intrinsic::set_loop_iterations:
case Intrinsic::start_loop_iterations:
case Intrinsic::test_set_loop_iterations:
case Intrinsic::loop_decrement:
case Intrinsic::loop_decrement_reg:

View File

@ -188,7 +188,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
continue;
Intrinsic::ID ID = Call->getIntrinsicID();
if (ID == Intrinsic::set_loop_iterations ||
if (ID == Intrinsic::start_loop_iterations ||
ID == Intrinsic::test_set_loop_iterations)
return cast<IntrinsicInst>(&I);
}

View File

@ -152,7 +152,7 @@ body: |
$q5 = MVE_VDUP32 $r3, 0, $noreg, $q5
$q4 = MVE_VDUP32 $r4, 0, $noreg, $q4
$q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0
t2DoLoopStart $r4
$lr = t2DoLoopStart $r4
$r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg
tBL 14, $noreg, @z
bb.1:
@ -160,7 +160,7 @@ body: |
$q5 = MVE_VDUP32 $r3, 0, $noreg, $q5
$q4 = MVE_VDUP32 $r4, 0, $noreg, $q4
$q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0
t2DoLoopStart $r4
$lr = t2DoLoopStart $r4
$r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg
tBL 14, $noreg, @z
bb.2:

View File

@ -21,7 +21,7 @@
; CHECK-END: b .LBB0_2
define void @check_loop_dec_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -49,7 +49,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -64,7 +64,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_ugt_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -92,7 +92,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -107,7 +107,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_ult_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -135,7 +135,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -150,7 +150,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_ult_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -179,7 +179,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -194,7 +194,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_sgt_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -222,7 +222,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -237,7 +237,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_sge_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -265,7 +265,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -280,7 +280,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_sge_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -309,7 +309,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -324,7 +324,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_uge_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -352,7 +352,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -367,7 +367,7 @@ for.cond.cleanup:
; CHECK-MID: tB %bb.2
define void @check_loop_dec_uge_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body.preheader
for.body.preheader:
@ -396,7 +396,7 @@ for.header:
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %for.body.preheader ], [ %count.next, %for.body ]
br label %for.body
for.cond.cleanup:
@ -507,6 +507,6 @@ while.end:
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i1 @llvm.test.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

View File

@ -17,17 +17,17 @@ define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* no
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: br i1 [[TOBOOL]], label [[VECTOR_BODY75_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.body75.preheader:
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP2]])
; CHECK-NEXT: [[START1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]])
; CHECK-NEXT: br label [[VECTOR_BODY75:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT71:%.*]] = insertelement <4 x i32> undef, i32 [[X]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT72:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT71]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]])
; CHECK-NEXT: [[START2:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP3]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i32* [ [[SCEVGEP10:%.*]], [[VECTOR_BODY]] ], [ [[D:%.*]], [[VECTOR_PH]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[START2]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[N]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV911:%.*]] = bitcast i32* [[LSR_IV9]] to <4 x i32>*
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
@ -48,7 +48,7 @@ define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* no
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32* [ [[S2:%.*]], [[VECTOR_BODY75_PREHEADER]] ], [ [[SCEVGEP4:%.*]], [[VECTOR_BODY75]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[D]], [[VECTOR_BODY75_PREHEADER]] ], [ [[SCEVGEP:%.*]], [[VECTOR_BODY75]] ]
; CHECK-NEXT: [[INDEX80:%.*]] = phi i32 [ [[INDEX_NEXT81:%.*]], [[VECTOR_BODY75]] ], [ 0, [[VECTOR_BODY75_PREHEADER]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP2]], [[VECTOR_BODY75_PREHEADER]] ], [ [[TMP17:%.*]], [[VECTOR_BODY75]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[START1]], [[VECTOR_BODY75_PREHEADER]] ], [ [[TMP17:%.*]], [[VECTOR_BODY75]] ]
; CHECK-NEXT: [[LSR_IV68:%.*]] = bitcast i32* [[LSR_IV6]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV35:%.*]] = bitcast i32* [[LSR_IV3]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV2:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>*
@ -88,19 +88,19 @@ for.body.lr.ph: ; preds = %entry
br i1 %tobool, label %vector.body75.preheader, label %vector.ph
vector.body75.preheader: ; preds = %for.body.lr.ph
call void @llvm.set.loop.iterations.i32(i32 %2)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %2)
br label %vector.body75
vector.ph: ; preds = %for.body.lr.ph
%broadcast.splatinsert71 = insertelement <4 x i32> undef, i32 %x, i32 0
%broadcast.splat72 = shufflevector <4 x i32> %broadcast.splatinsert71, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %3)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %3)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv9 = phi i32* [ %scevgep10, %vector.body ], [ %d, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%4 = phi i32 [ %3, %vector.ph ], [ %8, %vector.body ]
%4 = phi i32 [ %start2, %vector.ph ], [ %8, %vector.body ]
%lsr.iv911 = bitcast i32* %lsr.iv9 to <4 x i32>*
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@ -120,7 +120,7 @@ vector.body75: ; preds = %vector.body75, %vec
%lsr.iv3 = phi i32* [ %s2, %vector.body75.preheader ], [ %scevgep4, %vector.body75 ]
%lsr.iv = phi i32* [ %d, %vector.body75.preheader ], [ %scevgep, %vector.body75 ]
%index80 = phi i32 [ %index.next81, %vector.body75 ], [ 0, %vector.body75.preheader ]
%10 = phi i32 [ %2, %vector.body75.preheader ], [ %15, %vector.body75 ]
%10 = phi i32 [ %start1, %vector.body75.preheader ], [ %15, %vector.body75 ]
%lsr.iv68 = bitcast i32* %lsr.iv6 to <4 x i32>*
%lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>*
%lsr.iv2 = bitcast i32* %lsr.iv to <4 x i32>*
@ -148,7 +148,7 @@ for.cond.cleanup: ; preds = %vector.body, %vecto
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)

View File

@ -79,7 +79,7 @@ body: |
$r4 = t2MOVTi16 killed $r4, target-flags(arm-hi16) @arm_cmplx_conj_f32_mve.cmplx_conj_sign, 14 /* CC::al */, $noreg
renamable $q0 = nnan ninf nsz MVE_VLDRWU32 killed renamable $r4, 0, 0, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1 (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -9,13 +9,13 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
%0 = phi i32 [ %start, %entry ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
@ -30,7 +30,7 @@
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
@ -112,7 +112,7 @@ body: |
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg

View File

@ -15,9 +15,9 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i
; CHECK-NEXT: bic r12, r12, #3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: sub.w r12, r12, #4
; CHECK-NEXT: add.w lr, lr, r12, lsr #2
; CHECK-NEXT: add.w r12, lr, r12, lsr #2
; CHECK-NEXT: dls lr, r12
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: and r4, r12, #15
@ -107,9 +107,9 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a,
; CHECK-NEXT: bic r4, r4, #3
; CHECK-NEXT: sub.w lr, r4, #4
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
; CHECK-NEXT: add.w r4, r4, lr, lsr #2
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: and r5, r4, #15
@ -210,9 +210,9 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32
; CHECK-NEXT: bic r4, r4, #3
; CHECK-NEXT: sub.w lr, r4, #4
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
; CHECK-NEXT: add.w r4, r4, lr, lsr #2
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r12
@ -309,9 +309,9 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32*
; CHECK-NEXT: bic r4, r4, #3
; CHECK-NEXT: sub.w lr, r4, #4
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
; CHECK-NEXT: add.w r4, r4, lr, lsr #2
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r12
@ -402,8 +402,8 @@ define dso_local void @continue_on_zero(i32* noalias nocapture %arg, i32* noalia
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB4_1: @ %bb3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB4_2: @ %bb9
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r3, #4
@ -464,8 +464,8 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB5_1: @ %bb4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB5_2: @ %bb12
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0]

View File

@ -10,11 +10,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <8 x i16>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <8 x i16>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -46,11 +46,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -82,11 +82,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -115,7 +115,7 @@
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1 immarg)
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1 immarg)
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1 immarg)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
@ -166,23 +166,23 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = t2DLS renamable $r12
; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4
; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2)
; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2)
; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg
; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1
; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg
@ -190,7 +190,7 @@ body: |
; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
@ -201,27 +201,27 @@ body: |
frame-setup CFI_INSTRUCTION offset $r4, -8
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
liveins: $r0, $r1, $r2, $r3
renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
renamable $lr = t2DoLoopStart killed renamable $lr
$r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r4
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
MVE_VPST 4, implicit $vpr
renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2)
renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2)
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1
renamable $lr = t2LoopDec killed renamable $lr, 1
$r0 = tMOVr $r1, 14 /* CC::al */, $noreg
@ -232,7 +232,7 @@ body: |
tB %bb.3, 14 /* CC::al */, $noreg
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
...
---
@ -267,68 +267,69 @@ body: |
; CHECK-LABEL: name: test_ctlz_i16
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = t2DLS renamable $r4
; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4
; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r12
; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1
; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
; CHECK: liveins: $r4
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
liveins: $r0, $r1, $r2, $r3, $r7, $lr
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r4, -8
frame-setup CFI_INSTRUCTION offset $r7, -8
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
liveins: $r0, $r1, $r2, $r3
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
renamable $lr = t2DoLoopStart killed renamable $lr
$r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r12
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
$lr = tMOVr $r12, 14 /* CC::al */, $noreg
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
MVE_VPST 4, implicit $vpr
renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
@ -338,7 +339,7 @@ body: |
tB %bb.3, 14 /* CC::al */, $noreg
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
...
---
@ -373,68 +374,69 @@ body: |
; CHECK-LABEL: name: test_ctlz_i32
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = t2DLS renamable $r4
; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4
; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r12
; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1
; CHECK: renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
; CHECK: liveins: $r4
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
liveins: $r0, $r1, $r2, $r3, $r7, $lr
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r4, -8
frame-setup CFI_INSTRUCTION offset $r7, -8
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
liveins: $r0, $r1, $r2, $r3
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
renamable $lr = t2DoLoopStart killed renamable $lr
$r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r12
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
$lr = tMOVr $r12, 14 /* CC::al */, $noreg
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
MVE_VPST 4, implicit $vpr
renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4)
renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4)
renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg
@ -444,6 +446,6 @@ body: |
tB %bb.3, 14 /* CC::al */, $noreg
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
...

View File

@ -19,7 +19,7 @@
br i1 %tmp, label %bb27, label %bb3
bb3: ; preds = %bb
call void @llvm.set.loop.iterations.i32(i32 %tmp6)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp6)
%scevgep1 = getelementptr i32, i32* %arg3, i32 -4
br label %bb9
@ -27,7 +27,7 @@
%lsr.iv4 = phi i32* [ %scevgep6, %bb9 ], [ %scevgep1, %bb3 ]
%lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
%lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ]
%tmp7 = phi i32 [ %tmp6, %bb3 ], [ %tmp12, %bb9 ]
%tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ]
%tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>*
@ -56,7 +56,7 @@
}
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
@ -197,7 +197,7 @@ body: |
VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0)
renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0
$r3 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.bb9:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -11,14 +11,14 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%5 = phi i32 [ %4, %entry ], [ %9, %do.body ]
%5 = phi i32 [ %start, %entry ], [ %9, %do.body ]
%6 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
%input_cast = bitcast float* %pSrc.addr.0 to <4 x float>*
%7 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %input_cast, i32 4, <4 x i1> %6, <4 x float> undef)
@ -38,7 +38,7 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -136,7 +136,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.do.body (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -19,14 +19,14 @@
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
@ -47,7 +47,7 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
@ -162,7 +162,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -24,14 +24,14 @@
%5 = sub i32 %3, %smin36
%6 = lshr i32 %5, 2
%7 = add nuw nsw i32 %6, 1
call void @llvm.set.loop.iterations.i32(i32 %7)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %7)
br label %do.body
do.body: ; preds = %do.body, %entry
%count.0 = phi i32 [ %0, %entry ], [ %12, %do.body ]
%pInT.0 = phi float* [ %pIn, %entry ], [ %add.ptr, %do.body ]
%sumVec.0 = phi <4 x float> [ zeroinitializer, %entry ], [ %11, %do.body ]
%8 = phi i32 [ %7, %entry ], [ %13, %do.body ]
%8 = phi i32 [ %start1, %entry ], [ %13, %do.body ]
%pInT.033 = bitcast float* %pInT.0 to <4 x float>*
%9 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.0)
%10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer)
@ -125,7 +125,7 @@
%50 = bitcast float* %arrayidx17 to <4 x float>*
%51 = load <4 x float>, <4 x float>* %50, align 4
%52 = fmul fast <4 x float> %51, %40
call void @llvm.set.loop.iterations.i32(i32 %33)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %33)
br label %do.body24
do.body24: ; preds = %do.body24, %for.body
@ -138,7 +138,7 @@
%sumVec1.0 = phi <4 x float> [ %46, %for.body ], [ %58, %do.body24 ]
%sumVec2.0 = phi <4 x float> [ %49, %for.body ], [ %60, %do.body24 ]
%sumVec3.0 = phi <4 x float> [ %52, %for.body ], [ %62, %do.body24 ]
%53 = phi i32 [ %33, %for.body ], [ %63, %do.body24 ]
%53 = phi i32 [ %start2, %for.body ], [ %63, %do.body24 ]
%lsr.iv4 = bitcast float* %lsr.iv to <4 x float>*
%lsr.iv911 = bitcast float* %lsr.iv9 to <4 x float>*
%lsr.iv1618 = bitcast float* %lsr.iv16 to <4 x float>*
@ -219,7 +219,7 @@
%k.1200 = phi i32 [ %inc, %do.end66 ], [ %k.0.lcssa, %for.body56.preheader ]
%mul57 = mul i32 %k.1200, %0
%arrayidx58 = getelementptr inbounds float, float* %2, i32 %mul57
call void @llvm.set.loop.iterations.i32(i32 %38)
%start3 = call i32 @llvm.start.loop.iterations.i32(i32 %38)
br label %do.body59
do.body59: ; preds = %do.body59, %for.body56
@ -227,7 +227,7 @@
%pInT.2 = phi float* [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ]
%pCos0.1 = phi float* [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ]
%sumVec.1 = phi <4 x float> [ zeroinitializer, %for.body56 ], [ %93, %do.body59 ]
%89 = phi i32 [ %38, %for.body56 ], [ %95, %do.body59 ]
%89 = phi i32 [ %start3, %for.body56 ], [ %95, %do.body59 ]
%pInT.21 = bitcast float* %pInT.2 to <4 x float>*
%pCos0.12 = bitcast float* %pCos0.1 to <4 x float>*
%90 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.2)
@ -264,7 +264,7 @@
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #3
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1
declare void @llvm.set.loop.iterations.i32(i32) #4
declare i32 @llvm.start.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
...
@ -414,7 +414,7 @@ body: |
$r0 = tMOVr $r4, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
$r1 = tMOVr $r5, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.do.body (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
@ -503,7 +503,7 @@ body: |
$r3 = tMOVr $r10, 14 /* CC::al */, $noreg
$r5 = tMOVr $r1, 14 /* CC::al */, $noreg
$r4 = tMOVr $r12, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r7 = tMOVr $r6, 14 /* CC::al */, $noreg
renamable $r11 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %stack.5)
@ -592,7 +592,7 @@ body: |
$r6 = tMOVr $r4, 14 /* CC::al */, $noreg
$r7 = tMOVr $r5, 14 /* CC::al */, $noreg
$lr = tMOVr $r3, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
bb.13:
successors: %bb.10(0x80000000)

View File

@ -7,7 +7,7 @@
define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
%scevgep = getelementptr i32, i32* %a, i32 -1
%scevgep4 = getelementptr i32, i32* %c, i32 -1
%scevgep8 = getelementptr i32, i32* %b, i32 -1
@ -35,7 +35,7 @@
%lsr.iv9 = phi i32* [ %scevgep8, %entry ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %entry ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %entry ], [ %scevgep2, %for.body ]
%count = phi i32 [ %N, %entry ], [ %count.next, %for.body ]
%count = phi i32 [ %start, %entry ], [ %count.next, %for.body ]
br label %for.body
}
@ -43,7 +43,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -184,7 +184,7 @@ body: |
frame-setup CFI_INSTRUCTION offset $r7, -8
$sp = frame-setup tSUBspi $sp, 8, 14, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 40
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg

View File

@ -8,21 +8,21 @@ define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x)
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrd r12, r4, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r2, r3, [r0, #8]
; CHECK-NEXT: rsb r12, r12, r4, lsl #1
; CHECK-NEXT: dlstp.16 lr, r12
; CHECK-NEXT: mov r4, r12
; CHECK-NEXT: dlstp.16 lr, r4
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r2], #16
; CHECK-NEXT: vstrh.16 q0, [r3], #16
; CHECK-NEXT: vldrh.u16 q0, [r3], #16
; CHECK-NEXT: vstrh.16 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %do.end
; CHECK-NEXT: ldr r3, [r0]
; CHECK-NEXT: ldr r2, [r0]
; CHECK-NEXT: ldr r0, [r0, #8]
; CHECK-NEXT: vmov.i16 q0, #0x1800
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB0_3: @ %do.body6
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q1, [r1], #16

View File

@ -9,8 +9,8 @@ define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* no
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r3, #8
@ -69,8 +69,8 @@ define dso_local arm_aapcs_vfpcc void @zext_i8(i16* noalias nocapture %a, i8* no
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r3, #8
@ -129,8 +129,8 @@ define dso_local arm_aapcs_vfpcc void @sext_i16(i32* noalias nocapture %a, i16*
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r3, #4
@ -189,8 +189,8 @@ define dso_local arm_aapcs_vfpcc void @zext_i16(i32* noalias nocapture %a, i16*
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r3, #4

View File

@ -17,11 +17,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
@ -52,7 +52,7 @@
ret i32 %res.0.lcssa
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -155,7 +155,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14, $noreg
bb.2.vector.body:

View File

@ -49,10 +49,10 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: add.w r3, r12, r3, lsr #2
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB0_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r1, r3
@ -228,9 +228,9 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -321,11 +321,12 @@ define arm_aapcs_vfpcc float @fast_float_half_mac(half* nocapture readonly %b, h
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: sub.w r12, r2, #1
; CHECK-NEXT: adr r2, .LCPI2_1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: mov lr, r3
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q1, r12
; CHECK-NEXT: vdup.32 q2, r12
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill

View File

@ -13,14 +13,14 @@
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i16* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i16* %lsr.iv to <8 x i16>*
%lsr.iv1416 = bitcast i16* %lsr.iv14 to <8 x i16>*
@ -41,7 +41,7 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
@ -149,7 +149,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -20,14 +20,14 @@
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
@ -48,7 +48,7 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
@ -157,7 +157,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -13,14 +13,14 @@
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i8* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i8* %lsr.iv to <16 x i8>*
%lsr.iv1416 = bitcast i8* %lsr.iv14 to <16 x i8>*
@ -41,7 +41,7 @@
for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
@ -150,7 +150,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -16,11 +16,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
%lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
%lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
@ -64,7 +64,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -201,7 +201,7 @@ body: |
renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r4 = tMOVr killed $lr, 14, $noreg
bb.2.vector.body:

View File

@ -16,11 +16,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
%lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
%lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
@ -64,7 +64,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -201,7 +201,7 @@ body: |
renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r4 = tMOVr killed $lr, 14, $noreg
bb.2.vector.body:

View File

@ -16,11 +16,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ]
%lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
%lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
@ -64,7 +64,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -201,7 +201,7 @@ body: |
renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r4 = tMOVr killed $lr, 14, $noreg
bb.2.vector.body:

View File

@ -16,11 +16,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
%lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
@ -65,7 +65,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -200,7 +200,7 @@ body: |
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg
renamable $r5 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r5
$lr = t2DoLoopStart renamable $r5
$r4 = tMOVr killed $r5, 14, $noreg
bb.2.vector.body:

View File

@ -18,11 +18,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ]
%lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
@ -67,7 +67,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -201,7 +201,7 @@ body: |
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg
renamable $r5 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r5
$lr = t2DoLoopStart renamable $r5
$r4 = tMOVr killed $r5, 14, $noreg
bb.2.vector.body:

View File

@ -14,11 +14,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
@ -55,11 +55,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
@ -92,11 +92,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
@ -120,7 +120,7 @@
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -204,7 +204,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -321,7 +321,7 @@ body: |
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -440,7 +440,7 @@ body: |
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.2.vector.body:

View File

@ -12,11 +12,11 @@
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
store i32 %4, i32* %iter.addr, align 4
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %4, %entry ]
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
@ -47,12 +47,12 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
store i32 %4, i32* %iter.addr, align 4
br label %do.body
do.body: ; preds = %do.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %4, %entry ]
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
@ -84,7 +84,7 @@
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) #3
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #4
declare i32 @llvm.start.loop.iterations.i32(i32) #4
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
@ -178,7 +178,7 @@ body: |
renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
bb.1.do.body:
@ -247,8 +247,8 @@ body: |
; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
@ -282,8 +282,8 @@ body: |
renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $lr
t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
$lr = t2DoLoopStart renamable $lr
$r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
bb.1.do.body:

View File

@ -13,14 +13,14 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%5 = phi i32 [ %4, %entry ], [ %9, %do.body ]
%5 = phi i32 [ %start, %entry ], [ %9, %do.body ]
%6 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
%input_cast = bitcast float* %pSrc.addr.0 to <4 x float>*
%7 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %input_cast, i32 4, <4 x i1> %6, <4 x float> undef)
@ -40,7 +40,7 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -149,7 +149,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.do.body (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -14,14 +14,14 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%5 = phi i32 [ %4, %entry ], [ %9, %do.body ]
%5 = phi i32 [ %start, %entry ], [ %9, %do.body ]
%6 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
%input_cast = bitcast float* %pSrc.addr.0 to <4 x float>*
%7 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %input_cast, i32 4, <4 x i1> %6, <4 x float> undef)
@ -41,7 +41,7 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -140,7 +140,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.do.body (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -78,6 +78,7 @@ body: |
; CHECK: successors: %bb.5(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
; CHECK: dead $lr = tMOVr $r4, 14 /* CC::al */, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0
@ -151,7 +152,7 @@ body: |
renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
$r3 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.3:
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
@ -178,7 +179,7 @@ body: |
renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
$s2 = VMOVSR $r1, 14, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
t2DoLoopStart killed $r4
$lr = t2DoLoopStart killed $r4
renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0

View File

@ -15,14 +15,14 @@
%2 = sub i32 %0, %smin
%3 = lshr i32 %2, 2
%4 = add nuw nsw i32 %3, 1
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %entry
%blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
%pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ]
%5 = phi i32 [ %4, %entry ], [ %9, %do.body ]
%5 = phi i32 [ %start, %entry ], [ %9, %do.body ]
%6 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
%input_cast = bitcast float* %pSrc.addr.0 to <4 x float>*
%7 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %input_cast, i32 4, <4 x i1> %6, <4 x float> undef)
@ -42,7 +42,7 @@
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -149,7 +149,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r2 = tLEApcrel %const.0, 14, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.do.body (align 4):
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -20,11 +20,11 @@
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
%lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
@ -56,7 +56,7 @@
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -168,7 +168,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg
renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool)

View File

@ -18,11 +18,11 @@
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ]
%lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
@ -54,7 +54,7 @@
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
---
@ -165,7 +165,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg
renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool)

View File

@ -19,7 +19,7 @@
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -27,7 +27,7 @@
%lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
%elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %12, %vector.body ]
%lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>*
%lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>*
%7 = insertelement <4 x i32> undef, i32 %div, i32 0
@ -52,7 +52,7 @@
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -147,7 +147,7 @@ body: |
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool)
renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -14,11 +14,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
%lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
@ -50,7 +50,7 @@
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -136,7 +136,7 @@ body: |
renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $r4 = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r4
$lr = t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14, $noreg
bb.2.vector.body:

View File

@ -18,7 +18,7 @@
br i1 %tmp7, label %bb13, label %bb12
bb12: ; preds = %bb4
call void @llvm.set.loop.iterations.i32(i32 %tmp11)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp11)
br label %bb28
bb13: ; preds = %bb28, %bb4
@ -45,7 +45,7 @@
ret void
bb28: ; preds = %bb28, %bb12
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %tmp11, %bb12 ]
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
%0 = bitcast i32* %arg1 to i8*
@ -145,7 +145,7 @@
br label %bb27
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -387,7 +387,7 @@ body: |
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg
renamable $r8 = t2MOVi 0, 14, $noreg, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r12 = tMOVr killed $r3, 14, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg

View File

@ -18,7 +18,7 @@
br i1 %tmp7, label %bb13, label %bb12
bb12: ; preds = %bb4
call void @llvm.set.loop.iterations.i32(i32 %tmp11)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp11)
br label %bb28
bb13: ; preds = %bb28, %bb4
@ -46,7 +46,7 @@
ret i32 %res
bb28: ; preds = %bb28, %bb12
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %tmp11, %bb12 ]
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
%0 = bitcast i32* %arg1 to i8*
@ -146,7 +146,7 @@
br label %bb27
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -265,7 +265,8 @@ body: |
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $r3
; CHECK: dead $lr = t2DLS renamable $r3
; CHECK: $lr = tMOVr killed $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: bb.5.bb28:
; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000)
@ -403,7 +404,7 @@ body: |
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg
renamable $r8 = t2MOVi 0, 14, $noreg, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$lr = tMOVr killed $r3, 14, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg

View File

@ -18,7 +18,7 @@
br i1 %tmp7, label %bb13, label %bb12
bb12: ; preds = %bb4
call void @llvm.set.loop.iterations.i32(i32 %tmp11)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp11)
br label %bb28
bb13: ; preds = %bb28, %bb4
@ -46,7 +46,7 @@
ret i32 %res
bb28: ; preds = %bb28, %bb12
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %tmp11, %bb12 ]
%lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ]
%lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ]
%tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ]
%0 = bitcast i32* %arg1 to i8*
@ -146,7 +146,7 @@
br label %bb27
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -265,7 +265,8 @@ body: |
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $r3
; CHECK: dead $lr = t2DLS renamable $r3
; CHECK: $lr = tMOVr killed $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: bb.5.bb28:
; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000)
@ -403,7 +404,7 @@ body: |
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg
renamable $r8 = t2MOVi 0, 14, $noreg, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$lr = tMOVr $r3, 14, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg

View File

@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL
@ -16,10 +15,10 @@
; CHECK: ne_and_guard
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK: t2CMPri renamable $lr, 0
; CHECK: tCMPi8 renamable $r0, 0
; CHECK: tBcc %bb.4
; CHECK: bb.2.while.body.preheader:
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = t2DLS killed renamable $r0
; CHECK: bb.3.while.body:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
@ -49,10 +48,10 @@ if.end: ; preds = %while.body, %entry
; CHECK: ne_preheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK: t2CMPri renamable $lr, 0
; CHECK: tCMPi8 renamable $r0, 0
; CHECK: tBcc %bb.4
; CHECK: bb.2.while.body.preheader:
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = t2DLS killed renamable $r0
; CHECK: bb.3.while.body:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
@ -84,10 +83,10 @@ if.end: ; preds = %while.body, %while.
; CHECK: eq_preheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK: t2CMPri renamable $lr, 0
; CHECK: tCMPi8 renamable $r0, 0
; CHECK: tBcc %bb.4
; CHECK: bb.2.while.body.preheader:
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = t2DLS killed renamable $r0
; CHECK: bb.3.while.body:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
@ -119,10 +118,10 @@ if.end: ; preds = %while.body, %while.
; CHECK: ne_prepreheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK: t2CMPri renamable $lr, 0
; CHECK: t2CMPri renamable $r12, 0
; CHECK: tBcc %bb.4
; CHECK: bb.2.while.body.preheader:
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = t2DLS killed renamable $r12
; CHECK: bb.3.while.body:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
@ -153,7 +152,7 @@ if.end: ; preds = %while.body, %while.
; CHECK: be_ne
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = t2DLS killed renamable $r12
; CHECK: bb.2.do.body:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {

View File

@ -15,14 +15,14 @@
vector.ph: ; preds = %entry
%6 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %init, i32 0
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv13 = phi float* [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %5, %vector.ph ], [ %14, %vector.body ]
%7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %N, %vector.ph ], [ %10, %vector.body ]
%lsr.iv12 = bitcast float* %lsr.iv to <4 x float>*
%lsr.iv1315 = bitcast float* %lsr.iv13 to <4 x float>*
@ -63,14 +63,14 @@
vector.ph: ; preds = %entry
%6 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %init, i32 0
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi float* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %5, %vector.ph ], [ %14, %vector.body ]
%7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %shr, %vector.ph ], [ %10, %vector.body ]
%lsr.iv13 = bitcast float* %lsr.iv to <4 x float>*
%lsr.iv1416 = bitcast float* %lsr.iv14 to <4 x float>*
@ -99,7 +99,7 @@
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -205,7 +205,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
$s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1
@ -341,7 +341,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg
$s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1

View File

@ -16,7 +16,7 @@
%scevgep = getelementptr i32, i32* %a, i32 -1
%scevgep4 = getelementptr i32, i32* %c, i32 -1
%scevgep8 = getelementptr i32, i32* %b, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
@ -26,7 +26,7 @@
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
%0 = phi i32 [ %start, %for.body.preheader ], [ %3, %for.body ]
%size = call i32 @llvm.arm.space(i32 4096, i32 undef)
%scevgep3 = getelementptr i32, i32* %lsr.iv9, i32 1
%1 = load i32, i32* %scevgep3, align 4
@ -47,7 +47,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -157,7 +157,7 @@ body: |
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
$lr = t2DoLoopStart killed $r3
bb.2.for.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -26,14 +26,14 @@
call void @llvm.dbg.value(metadata i32 0, metadata !31, metadata !DIExpression()), !dbg !32
%arrayidx7.us = getelementptr inbounds i32, i32* %e, i32 %i.031.us, !dbg !38
%arrayidx7.promoted.us = load i32, i32* %arrayidx7.us, align 4, !dbg !41
call void @llvm.set.loop.iterations.i32(i32 %d), !dbg !46
%start = call i32 @llvm.start.loop.iterations.i32(i32 %d), !dbg !46
br label %for.body3.us, !dbg !46
for.body3.us: ; preds = %for.body3.us, %for.cond1.preheader.us
%lsr.iv5 = phi i16* [ %scevgep6, %for.body3.us ], [ %lsr.iv2, %for.cond1.preheader.us ], !dbg !32
%lsr.iv1 = phi i16* [ %scevgep, %for.body3.us ], [ %l, %for.cond1.preheader.us ], !dbg !32
%add829.us = phi i32 [ %arrayidx7.promoted.us, %for.cond1.preheader.us ], [ %add8.us, %for.body3.us ], !dbg !32
%1 = phi i32 [ %d, %for.cond1.preheader.us ], [ %4, %for.body3.us ], !dbg !32
%1 = phi i32 [ %start, %for.cond1.preheader.us ], [ %4, %for.body3.us ], !dbg !32
call void @llvm.dbg.value(metadata i32 undef, metadata !31, metadata !DIExpression()), !dbg !32
%2 = load i16, i16* %lsr.iv5, align 2, !dbg !47
%conv.us = sext i16 %2 to i32, !dbg !47
@ -67,7 +67,7 @@
}
declare !dbg !4 dso_local arm_aapcscc signext i16 @get_input(i32, i32*, i16 signext)
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
!llvm.dbg.cu = !{!0}
@ -325,7 +325,7 @@ body: |
$r3 = tMOVr $r5, 14, $noreg, debug-location !32
$r0 = tMOVr $r8, 14, $noreg, debug-location !32
$lr = tMOVr $r10, 14, $noreg, debug-location !32
t2DoLoopStart renamable $r10, debug-location !46
$lr = t2DoLoopStart renamable $r10, debug-location !46
bb.3.for.body3.us:
successors: %bb.3(0x7c000000), %bb.4(0x04000000)

View File

@ -10,7 +10,7 @@
br i1 %cmp19.i, label %for.body.i.preheader, label %c.exit.thread
for.body.i.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %d)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %d)
br label %for.body.i
c.exit.thread: ; preds = %entry
@ -22,7 +22,7 @@
%lsr.iv15 = phi i32* [ %e, %for.body.i.preheader ], [ %scevgep16, %for.body.i ]
%h.022.i = phi i16 [ %h.1.i, %for.body.i ], [ 0, %for.body.i.preheader ]
%f.020.i = phi i32 [ %f.1.i, %for.body.i ], [ undef, %for.body.i.preheader ]
%0 = phi i32 [ %d, %for.body.i.preheader ], [ %2, %for.body.i ]
%0 = phi i32 [ %start1, %for.body.i.preheader ], [ %2, %for.body.i ]
%1 = load i32, i32* %lsr.iv15, align 4
%add.i = add nsw i32 %1, %f.020.i
%cmp1.i = icmp sgt i32 %add.i, 0
@ -60,14 +60,14 @@
%arrayidx12.us = getelementptr inbounds i32, i32* %e, i32 %i.064.us
%arrayidx12.promoted.us = load i32, i32* %arrayidx12.us, align 4
%11 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx12.promoted.us, i32 0
call void @llvm.set.loop.iterations.i32(i32 %8)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %8)
br label %vector.body
vector.body: ; preds = %vector.body, %for.cond4.preheader.us
%lsr.iv10 = phi i16* [ %scevgep11, %vector.body ], [ %lsr.iv7, %for.cond4.preheader.us ]
%lsr.iv4 = phi i16* [ %scevgep5, %vector.body ], [ %l, %for.cond4.preheader.us ]
%vec.phi = phi <4 x i32> [ %11, %for.cond4.preheader.us ], [ %19, %vector.body ]
%12 = phi i32 [ %8, %for.cond4.preheader.us ], [ %20, %vector.body ]
%12 = phi i32 [ %start2, %for.cond4.preheader.us ], [ %20, %vector.body ]
%13 = phi i32 [ %d, %for.cond4.preheader.us ], [ %15, %vector.body ]
%lsr.iv1012 = bitcast i16* %lsr.iv10 to <4 x i16>*
%lsr.iv46 = bitcast i16* %lsr.iv4 to <4 x i16>*
@ -108,14 +108,14 @@
br i1 %29, label %for.body.i57.preheader, label %c.exit59
for.body.i57.preheader: ; preds = %for.end16
call void @llvm.set.loop.iterations.i32(i32 %d)
%start3 = call i32 @llvm.start.loop.iterations.i32(i32 %d)
br label %for.body.i57
for.body.i57: ; preds = %for.body.i57, %for.body.i57.preheader
%lsr.iv1 = phi i32* [ %e, %for.body.i57.preheader ], [ %scevgep, %for.body.i57 ]
%h.022.i44 = phi i16 [ %h.1.i54, %for.body.i57 ], [ 0, %for.body.i57.preheader ]
%f.020.i46 = phi i32 [ %f.1.i51, %for.body.i57 ], [ undef, %for.body.i57.preheader ]
%30 = phi i32 [ %d, %for.body.i57.preheader ], [ %32, %for.body.i57 ]
%30 = phi i32 [ %start3, %for.body.i57.preheader ], [ %32, %for.body.i57 ]
%31 = load i32, i32* %lsr.iv1, align 4
%add.i48 = add nsw i32 %31, %f.020.i46
%cmp1.i49 = icmp sgt i32 %add.i48, 0
@ -142,7 +142,7 @@
declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -385,7 +385,7 @@ body: |
renamable $r2 = IMPLICIT_DEF
$r10 = tMOVr $r0, 14, $noreg
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed renamable $r0
$lr = t2DoLoopStart killed renamable $r0
bb.2.for.body.i:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -443,7 +443,7 @@ body: |
$r6 = tMOVr $r5, 14, $noreg
$r1 = tMOVr $r8, 14, $noreg
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $r0
$lr = t2DoLoopStart renamable $r0
bb.6.vector.body:
successors: %bb.6(0x7c000000), %bb.7(0x04000000)
@ -488,7 +488,7 @@ body: |
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r1 = IMPLICIT_DEF
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.10.for.body.i57:
successors: %bb.10(0x7c000000), %bb.11(0x04000000)

View File

@ -9,13 +9,13 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
%0 = phi i32 [ %start, %entry ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
@ -30,7 +30,7 @@
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
@ -91,7 +91,8 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: $lr = t2DLS killed $r0
; CHECK: dead $lr = t2DLS $r0
; CHECK: $lr = tMOVr killed $r0, 14 /* CC::al */, $noreg
; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: bb.1.while.body:
@ -111,7 +112,7 @@ body: |
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
$lr = t2DoLoopStart $r0
$lr = tMOVr killed $r0, 14, $noreg
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg

View File

@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops -tail-predication=enabled %s -o - | FileCheck %s
# TODOD: As far as I can tell this test is fine. The tail predicating the second loop means we remove the instruction that would otherwise block the first.
--- |
define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 {
entry:
@ -15,14 +17,14 @@
%6 = sub i32 %0, %smin3
%7 = lshr i32 %6, 2
%8 = add nuw nsw i32 %7, 1
call void @llvm.set.loop.iterations.i32(i32 %8)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %8)
br label %do.body.i
do.body.i: ; preds = %do.body.i, %entry
%blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ]
%sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ]
%pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
%9 = phi i32 [ %8, %entry ], [ %14, %do.body.i ]
%9 = phi i32 [ %start1, %entry ], [ %14, %do.body.i ]
%pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>*
%10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i)
%11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer)
@ -42,14 +44,14 @@
%18 = insertelement <4 x i32> undef, i32 %17, i64 0
%19 = shufflevector <4 x i32> %18, <4 x i32> undef, <4 x i32> zeroinitializer
%20 = bitcast <4 x i32> %19 to <4 x float>
call void @llvm.set.loop.iterations.i32(i32 %4)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %do.body
do.body: ; preds = %do.body, %arm_mean_f32_mve.exit
%blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ]
%sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ]
%pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
%21 = phi i32 [ %4, %arm_mean_f32_mve.exit ], [ %27, %do.body ]
%21 = phi i32 [ %start2, %arm_mean_f32_mve.exit ], [ %27, %do.body ]
%pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>*
%22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
%23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer)
@ -87,7 +89,7 @@
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #3
@ -152,32 +154,22 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 10, 8, implicit-def $itstate
; CHECK: renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
; CHECK: renamable $r12 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body.i:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.arm_mean_f32_mve.exit:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
; CHECK: dead $lr = tMOVr $r4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0
; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
@ -224,7 +216,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
$r3 = tMOVr $r1, 14 /* CC::al */, $noreg
$r12 = tMOVr $r0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r4 = tMOVr $lr, 14 /* CC::al */, $noreg
bb.1.do.body.i:
@ -247,7 +239,7 @@ body: |
$s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
t2DoLoopStart killed $r4
$lr = t2DoLoopStart killed $r4
renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg

View File

@ -14,13 +14,13 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
@ -46,7 +46,7 @@
}
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
...
@ -153,7 +153,7 @@ body: |
renamable $r5 = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r12 = t2LSRri killed renamable $r3, 1, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart renamable $r5
$lr = t2DoLoopStart renamable $r5
$lr = tMOVr killed $r5, 14, $noreg
bb.2.vector.body:

View File

@ -6,35 +6,31 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: cmp r1, #4
; CHECK-NEXT: it ge
; CHECK-NEXT: movge r3, #4
; CHECK-NEXT: mov.w r12, #1
; CHECK-NEXT: subs r3, r1, r3
; CHECK-NEXT: movge r4, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r4, r1, r4
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: adds r3, #3
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: adds r4, #3
; CHECK-NEXT: add.w r12, r3, r4, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q1, [r12], #16
; CHECK-NEXT: vaddt.f32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: vldrw.u32 q1, [r4], #16
; CHECK-NEXT: vadd.f32 q0, q0, q1
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: dls lr, r12
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: vcvt.f32.u32 s4, s4
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vdiv.f32 s0, s0, s4
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@ -42,7 +38,7 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrwt.u32 q1, [r0], #16
; CHECK-NEXT: vsubt.f32 q1, q1, r12
; CHECK-NEXT: vsubt.f32 q1, q1, r4
; CHECK-NEXT: vfmat.f32 q0, q1, q1
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end

View File

@ -18,13 +18,13 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
@ -50,7 +50,7 @@
}
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -169,7 +169,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
$r12 = t2MOVr killed $r3, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg

View File

@ -18,13 +18,13 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
@ -50,7 +50,7 @@
}
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -168,7 +168,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r12 = t2MOVr killed $r3, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg

View File

@ -36,17 +36,17 @@
br i1 %26, label %49, label %31
31: ; preds = %23
call void @llvm.set.loop.iterations.i32(i32 %30)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %30)
br label %65
32: ; preds = %11
call void @llvm.set.loop.iterations.i32(i32 %22)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %22)
br label %33
33: ; preds = %33, %32
%34 = phi i32* [ %46, %33 ], [ %0, %32 ]
%35 = phi i32* [ %45, %33 ], [ %1, %32 ]
%36 = phi i32 [ %22, %32 ], [ %47, %33 ]
%36 = phi i32 [ %start2, %32 ], [ %47, %33 ]
%37 = phi i32 [ %9, %32 ], [ %41, %33 ]
%38 = bitcast i32* %34 to <4 x i32>*
%39 = bitcast i32* %35 to <4 x i32>*
@ -89,7 +89,7 @@
65: ; preds = %65, %31
%66 = phi i32 [ %108, %65 ], [ 0, %31 ]
%67 = phi i32 [ 0, %31 ], [ %107, %65 ]
%68 = phi i32 [ %30, %31 ], [ %109, %65 ]
%68 = phi i32 [ %start1, %31 ], [ %109, %65 ]
%69 = bitcast i32* %0 to i8*
%70 = bitcast i32* %1 to i8*
%71 = getelementptr i8, i8* %70, i32 %66
@ -141,7 +141,7 @@
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -353,7 +353,7 @@ body: |
renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
$r2 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.3 (%ir-block.33):
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
@ -402,7 +402,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8 (%ir-block.65):
successors: %bb.8(0x7c000000), %bb.9(0x04000000)

View File

@ -18,13 +18,13 @@
br i1 %10, label %34, label %17
17: ; preds = %4
call void @llvm.set.loop.iterations.i32(i32 %16)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %16)
br label %18
18: ; preds = %18, %17
%19 = phi i32* [ %31, %18 ], [ %0, %17 ]
%20 = phi i32* [ %30, %18 ], [ %1, %17 ]
%21 = phi i32 [ %16, %17 ], [ %32, %18 ]
%21 = phi i32 [ %start, %17 ], [ %32, %18 ]
%22 = phi i32 [ %9, %17 ], [ %26, %18 ]
%23 = bitcast i32* %19 to <4 x i32>*
%24 = bitcast i32* %20 to <4 x i32>*
@ -45,7 +45,7 @@
}
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -143,7 +143,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
$r3 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2 (%ir-block.18):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -8,7 +8,7 @@
br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body
for.cond.cleanup: ; preds = %for.end, %entry
@ -18,7 +18,7 @@
%lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
%lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
%lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
%lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
%lsr.iv = phi i32 [ %start, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
%size = call i32 @llvm.arm.space(i32 3072, i32 undef)
%0 = load i32, i32* %lsr.iv4, align 4
%1 = load i32, i32* %lsr.iv2, align 4
@ -46,7 +46,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -166,7 +166,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
$lr = t2DoLoopStart killed $r3
tB %bb.2, 14, $noreg
bb.2.for.end:

View File

@ -14,14 +14,14 @@
br i1 %cmp30, label %for.cond.cleanup6, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
%lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
%lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
%lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
@ -50,14 +50,14 @@
br i1 %13, label %for.cond.cleanup6, label %vector.ph39
vector.ph39: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %19)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %19)
br label %vector.body38
vector.body38: ; preds = %vector.body38, %vector.ph39
%lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
%lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%20 = phi i32 [ %19, %vector.ph39 ], [ %26, %vector.body38 ]
%20 = phi i32 [ %start2, %vector.ph39 ], [ %26, %vector.body38 ]
%21 = phi i32 [ %N, %vector.ph39 ], [ %23, %vector.body38 ]
%lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
%lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
@ -94,14 +94,14 @@
br i1 %cmp30, label %for.cond4.preheader, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
%lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
%lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
%lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
@ -130,14 +130,14 @@
br i1 %cmp528, label %for.cond.cleanup6, label %vector.ph39
vector.ph39: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %18)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %18)
br label %vector.body38
vector.body38: ; preds = %vector.body38, %vector.ph39
%lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
%lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%19 = phi i32 [ %18, %vector.ph39 ], [ %25, %vector.body38 ]
%19 = phi i32 [ %start2, %vector.ph39 ], [ %25, %vector.body38 ]
%20 = phi i32 [ %N, %vector.ph39 ], [ %22, %vector.body38 ]
%lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
%lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
@ -173,14 +173,14 @@
br i1 %cmp54, label %for.cond.cleanup17, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv123 = phi i32* [ %scevgep124, %vector.body ], [ %a, %vector.ph ]
%lsr.iv120 = phi i32* [ %scevgep121, %vector.body ], [ %c, %vector.ph ]
%lsr.iv117 = phi i32* [ %scevgep118, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv123125 = bitcast i32* %lsr.iv123 to <4 x i32>*
%lsr.iv120122 = bitcast i32* %lsr.iv120 to <4 x i32>*
@ -210,14 +210,14 @@
br i1 %cmp552, label %for.cond15.preheader, label %vector.ph66
vector.ph66: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %18)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %18)
br label %vector.body65
vector.body65: ; preds = %vector.body65, %vector.ph66
%lsr.iv114 = phi i32* [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ]
%lsr.iv111 = phi i32* [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ]
%lsr.iv108 = phi i32* [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ]
%19 = phi i32 [ %18, %vector.ph66 ], [ %25, %vector.body65 ]
%19 = phi i32 [ %start2, %vector.ph66 ], [ %25, %vector.body65 ]
%20 = phi i32 [ %div, %vector.ph66 ], [ %22, %vector.body65 ]
%lsr.iv114116 = bitcast i32* %lsr.iv114 to <4 x i32>*
%lsr.iv111113 = bitcast i32* %lsr.iv111 to <4 x i32>*
@ -248,14 +248,14 @@
br i1 %27, label %for.cond.cleanup17, label %vector.ph85
vector.ph85: ; preds = %for.cond15.preheader
call void @llvm.set.loop.iterations.i32(i32 %33)
%start3 = call i32 @llvm.start.loop.iterations.i32(i32 %33)
br label %vector.body84
vector.body84: ; preds = %vector.body84, %vector.ph85
%lsr.iv105 = phi i32* [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ]
%lsr.iv102 = phi i32* [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ]
%34 = phi i32 [ %33, %vector.ph85 ], [ %40, %vector.body84 ]
%34 = phi i32 [ %start3, %vector.ph85 ], [ %40, %vector.body84 ]
%35 = phi i32 [ %N, %vector.ph85 ], [ %37, %vector.body84 ]
%lsr.iv105107 = bitcast i32* %lsr.iv105 to <4 x i32>*
%lsr.iv102104 = bitcast i32* %lsr.iv102 to <4 x i32>*
@ -280,7 +280,7 @@
}
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -431,7 +431,7 @@ body: |
$r4 = tMOVr $r3, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, renamable $r12, 19, 14, $noreg, $noreg
$r6 = tMOVr $r1, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -462,7 +462,7 @@ body: |
renamable $r6, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, killed renamable $r12, 19, 14, $noreg, $noreg
$r12 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body38:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
@ -637,7 +637,7 @@ body: |
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14, $noreg
renamable $lr = nuw nsw t2ADDrs renamable $r12, killed renamable $r6, 19, 14, $noreg, $noreg
$r6 = tMOVr $r2, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -670,7 +670,7 @@ body: |
renamable $r6 = t2BICri killed renamable $r6, 3, 14, $noreg, $noreg
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r6, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body38:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
@ -878,7 +878,7 @@ body: |
$r4 = tMOVr $r3, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, renamable $r12, 19, 14, $noreg, $noreg
$r6 = tMOVr $r1, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -919,7 +919,7 @@ body: |
$r4 = tMOVr $r1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs renamable $r8, killed renamable $r6, 19, 14, $noreg, $noreg
$r6 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body65:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
@ -952,7 +952,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r8, killed renamable $r12, 19, 14, $noreg, $noreg
$r5 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8.vector.body84:
successors: %bb.8(0x7c000000), %bb.9(0x04000000)

View File

@ -92,9 +92,9 @@ define arm_aapcs_vfpcc void @float_float_mul(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB0_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
@ -311,9 +311,9 @@ define arm_aapcs_vfpcc void @float_float_add(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB1_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
@ -530,9 +530,9 @@ define arm_aapcs_vfpcc void @float_float_sub(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB2_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
@ -680,9 +680,9 @@ define arm_aapcs_vfpcc void @float_int_mul(float* nocapture readonly %a, i32* no
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB3_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
@ -889,10 +889,10 @@ define arm_aapcs_vfpcc void @float_int_int_mul(i32* nocapture readonly %a, i32*
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB4_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r4], #16
@ -906,11 +906,11 @@ define arm_aapcs_vfpcc void @float_int_int_mul(i32* nocapture readonly %a, i32*
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, r5, r6, pc}
; CHECK-NEXT: .LBB4_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB4_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r3, [r0], #4
@ -994,10 +994,10 @@ define arm_aapcs_vfpcc void @half_half_mul(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
@ -1021,11 +1021,11 @@ define arm_aapcs_vfpcc void @half_half_mul(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB5_8
; CHECK-NEXT: .LBB5_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB5_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
@ -1111,10 +1111,10 @@ define arm_aapcs_vfpcc void @half_half_add(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB6_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
@ -1138,11 +1138,11 @@ define arm_aapcs_vfpcc void @half_half_add(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB6_8
; CHECK-NEXT: .LBB6_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB6_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
@ -1228,10 +1228,10 @@ define arm_aapcs_vfpcc void @half_half_sub(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB7_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
@ -1255,11 +1255,11 @@ define arm_aapcs_vfpcc void @half_half_sub(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB7_8
; CHECK-NEXT: .LBB7_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB7_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
@ -1345,10 +1345,10 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB8_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u32 q0, [r5], #8
@ -1377,11 +1377,11 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB8_8
; CHECK-NEXT: .LBB8_6: @ %for.body.preheader13
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB8_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r3, [r1], #2
@ -1476,9 +1476,9 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: .LBB9_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
@ -1633,9 +1633,9 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: .LBB10_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
@ -1790,10 +1790,10 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: adds r3, r1, #4
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: adds r2, r0, #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB11_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2]

View File

@ -15,9 +15,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -91,9 +91,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -167,9 +167,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -243,9 +243,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -319,9 +319,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
@ -430,10 +430,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: adds r5, r0, #3
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: adds r6, r1, #1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrb r8, [r5, #-3]
@ -624,8 +624,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB6_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
@ -732,10 +732,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: adds r5, r0, #3
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: adds r6, r1, #1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB7_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrb r8, [r5, #-3]
@ -926,8 +926,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
@ -1034,10 +1034,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: add.w r5, r0, #8
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: add.w r6, r1, #8
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB9_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r8, [r5, #-8]
@ -1214,8 +1214,8 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB10_1: @ %vector.ph
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB10_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add.w r12, r12, #8

View File

@ -12,47 +12,47 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP:%.*]] = add i32 [[N_VEC]], -4
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4
; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2
; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:
; CHECK-NEXT: [[I_025_US:%.*]] = phi i32 [ [[INC10_US:%.*]], [[MIDDLE_BLOCK:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16*, i16** [[A:%.*]], i32 [[I_025_US]]
; CHECK-NEXT: [[TMP3:%.*]] = load i16*, i16** [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[TT3:%.*]] = load i16*, i16** [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[I_025_US]]
; CHECK-NEXT: [[ARRAYIDX8_PROMOTED_US:%.*]] = load i32, i32* [[ARRAYIDX8_US]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX8_PROMOTED_US]], i32 0
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TT4:%.*]] = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX8_PROMOTED_US]], i32 0
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TT2]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP2]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT14:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT15:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[INDEX]]
; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i16, i16* [[TT3]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP8]], i32 2, <4 x i1> [[TMP1]], <4 x i16> undef)
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP11]], i32 2, <4 x i1> [[TMP1]], <4 x i16> undef)
; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD30]] to <4 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP9]]
; CHECK-NEXT: [[TMP14]] = add nsw <4 x i32> [[TMP13]], [[VEC_PHI]]
; CHECK-NEXT: [[TT8:%.*]] = bitcast i16* [[TT6]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TT8]], i32 2, <4 x i1> [[TMP1]], <4 x i16> undef)
; CHECK-NEXT: [[TT9:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32>
; CHECK-NEXT: [[TT10:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TT11:%.*]] = bitcast i16* [[TT10]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TT11]], i32 2, <4 x i1> [[TMP1]], <4 x i16> undef)
; CHECK-NEXT: [[TT12:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD30]] to <4 x i32>
; CHECK-NEXT: [[TT13:%.*]] = mul nsw <4 x i32> [[TT12]], [[TT9]]
; CHECK-NEXT: [[TT14]] = add nsw <4 x i32> [[TT13]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1)
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
; CHECK-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK-NEXT: [[TT15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TT5]], i32 1)
; CHECK-NEXT: [[TT16:%.*]] = icmp ne i32 [[TT15]], 0
; CHECK-NEXT: br i1 [[TT16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP14]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
; CHECK-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8_US]], align 4
; CHECK-NEXT: [[TT17:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TT14]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TT18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TT17]])
; CHECK-NEXT: store i32 [[TT18]], i32* [[ARRAYIDX8_US]], align 4
; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[I_025_US]], 1
; CHECK-NEXT: [[EXITCOND27:%.*]] = icmp eq i32 [[INC10_US]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND27]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
@ -69,51 +69,51 @@ for.cond1.preheader.us.preheader: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert28 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat29 = shufflevector <4 x i32> %broadcast.splatinsert28, <4 x i32> undef, <4 x i32> zeroinitializer
%tmp = add i32 %n.vec, -4
%tmp1 = lshr i32 %tmp, 2
%tmp2 = add nuw nsw i32 %tmp1, 1
%tt = add i32 %n.vec, -4
%tt1 = lshr i32 %tt, 2
%tt2 = add nuw nsw i32 %tt1, 1
br label %for.cond1.preheader.us
for.cond1.preheader.us: ; preds = %middle.block, %for.cond1.preheader.us.preheader
%i.025.us = phi i32 [ %inc10.us, %middle.block ], [ 0, %for.cond1.preheader.us.preheader ]
%arrayidx.us = getelementptr inbounds i16*, i16** %A, i32 %i.025.us
%tmp3 = load i16*, i16** %arrayidx.us, align 4
%tt3 = load i16*, i16** %arrayidx.us, align 4
%arrayidx8.us = getelementptr inbounds i32, i32* %C, i32 %i.025.us
%arrayidx8.promoted.us = load i32, i32* %arrayidx8.us, align 4
%tmp4 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx8.promoted.us, i32 0
call void @llvm.set.loop.iterations.i32(i32 %tmp2)
%tt4 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx8.promoted.us, i32 0
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tt2)
br label %vector.body
vector.body: ; preds = %vector.body, %for.cond1.preheader.us
%index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ %tmp4, %for.cond1.preheader.us ], [ %tmp14, %vector.body ]
%tmp5 = phi i32 [ %tmp2, %for.cond1.preheader.us ], [ %tmp15, %vector.body ]
%vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt14, %vector.body ]
%tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
%tmp6 = getelementptr inbounds i16, i16* %tmp3, i32 %index
%tt6 = getelementptr inbounds i16, i16* %tt3, i32 %index
; %tmp7 = icmp ule <4 x i32> %induction, %broadcast.splat29
%tmp7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat29
%tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
%tmp8 = bitcast i16* %tmp6 to <4 x i16>*
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp8, i32 2, <4 x i1> %tmp7, <4 x i16> undef)
%tmp9 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp10 = getelementptr inbounds i16, i16* %B, i32 %index
%tmp11 = bitcast i16* %tmp10 to <4 x i16>*
%wide.masked.load30 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp11, i32 2, <4 x i1> %tmp7, <4 x i16> undef)
%tmp12 = sext <4 x i16> %wide.masked.load30 to <4 x i32>
%tmp13 = mul nsw <4 x i32> %tmp12, %tmp9
%tmp14 = add nsw <4 x i32> %tmp13, %vec.phi
%tt8 = bitcast i16* %tt6 to <4 x i16>*
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tt8, i32 2, <4 x i1> %tt7, <4 x i16> undef)
%tt9 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tt10 = getelementptr inbounds i16, i16* %B, i32 %index
%tt11 = bitcast i16* %tt10 to <4 x i16>*
%wide.masked.load30 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tt11, i32 2, <4 x i1> %tt7, <4 x i16> undef)
%tt12 = sext <4 x i16> %wide.masked.load30 to <4 x i32>
%tt13 = mul nsw <4 x i32> %tt12, %tt9
%tt14 = add nsw <4 x i32> %tt13, %vec.phi
%index.next = add i32 %index, 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32(i32 %tmp5, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
br i1 %tmp16, label %vector.body, label %middle.block
%tt15 = call i32 @llvm.loop.decrement.reg.i32(i32 %tt5, i32 1)
%tt16 = icmp ne i32 %tt15, 0
br i1 %tt16, label %vector.body, label %middle.block
middle.block: ; preds = %vector.body
%tmp17 = select <4 x i1> %tmp7, <4 x i32> %tmp14, <4 x i32> %vec.phi
%tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17)
store i32 %tmp18, i32* %arrayidx8.us, align 4
%tt17 = select <4 x i1> %tt7, <4 x i32> %tt14, <4 x i32> %vec.phi
%tt18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tt17)
store i32 %tt18, i32* %arrayidx8.us, align 4
%inc10.us = add nuw i32 %i.025.us, 1
%exitcond27 = icmp eq i32 %inc10.us, %N
br i1 %exitcond27, label %for.cond.cleanup, label %for.cond1.preheader.us
@ -133,45 +133,45 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP:%.*]] = add i32 [[N_VEC]], -4
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4
; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2
; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:
; CHECK-NEXT: [[I_024_US:%.*]] = phi i32 [ [[INC9_US:%.*]], [[MIDDLE_BLOCK:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i32 [[I_024_US]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[TT3:%.*]] = load i32*, i32** [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[ARRAYIDX7_US:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[I_024_US]]
; CHECK-NEXT: [[ARRAYIDX7_PROMOTED_US:%.*]] = load i32, i32* [[ARRAYIDX7_US]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX7_PROMOTED_US]], i32 0
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TT4:%.*]] = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX7_PROMOTED_US]], i32 0
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TT2]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP2]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT13:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 [[INDEX]]
; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i32, i32* [[TT3]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP8]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP10]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD29]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP12]] = add nsw <4 x i32> [[VEC_PHI]], [[TMP11]]
; CHECK-NEXT: [[TT8:%.*]] = bitcast i32* [[TT6]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TT8]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
; CHECK-NEXT: [[TT9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TT10:%.*]] = bitcast i32* [[TT9]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TT10]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
; CHECK-NEXT: [[TT11:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD29]], [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TT12]] = add nsw <4 x i32> [[VEC_PHI]], [[TT11]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP13]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1)
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK-NEXT: [[TT13]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TT5]], i32 1)
; CHECK-NEXT: [[TT14:%.*]] = icmp ne i32 [[TT13]], 0
; CHECK-NEXT: br i1 [[TT14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP12]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
; CHECK-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX7_US]], align 4
; CHECK-NEXT: [[TT15:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TT12]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TT16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TT15]])
; CHECK-NEXT: store i32 [[TT16]], i32* [[ARRAYIDX7_US]], align 4
; CHECK-NEXT: [[INC9_US]] = add nuw i32 [[I_024_US]], 1
; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[INC9_US]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
@ -188,49 +188,49 @@ for.cond1.preheader.us.preheader: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert27 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat28 = shufflevector <4 x i32> %broadcast.splatinsert27, <4 x i32> undef, <4 x i32> zeroinitializer
%tmp = add i32 %n.vec, -4
%tmp1 = lshr i32 %tmp, 2
%tmp2 = add nuw nsw i32 %tmp1, 1
%tt = add i32 %n.vec, -4
%tt1 = lshr i32 %tt, 2
%tt2 = add nuw nsw i32 %tt1, 1
br label %for.cond1.preheader.us
for.cond1.preheader.us: ; preds = %middle.block, %for.cond1.preheader.us.preheader
%i.024.us = phi i32 [ %inc9.us, %middle.block ], [ 0, %for.cond1.preheader.us.preheader ]
%arrayidx.us = getelementptr inbounds i32*, i32** %A, i32 %i.024.us
%tmp3 = load i32*, i32** %arrayidx.us, align 4
%tt3 = load i32*, i32** %arrayidx.us, align 4
%arrayidx7.us = getelementptr inbounds i32, i32* %C, i32 %i.024.us
%arrayidx7.promoted.us = load i32, i32* %arrayidx7.us, align 4
%tmp4 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx7.promoted.us, i32 0
call void @llvm.set.loop.iterations.i32(i32 %tmp2)
%tt4 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx7.promoted.us, i32 0
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tt2)
br label %vector.body
vector.body: ; preds = %vector.body, %for.cond1.preheader.us
%index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ %tmp4, %for.cond1.preheader.us ], [ %tmp12, %vector.body ]
%tmp5 = phi i32 [ %tmp2, %for.cond1.preheader.us ], [ %tmp13, %vector.body ]
%vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt12, %vector.body ]
%tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt13, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
%tmp6 = getelementptr inbounds i32, i32* %tmp3, i32 %index
%tt6 = getelementptr inbounds i32, i32* %tt3, i32 %index
; %tmp7 = icmp ule <4 x i32> %induction, %broadcast.splat28
%tmp7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat28
%tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
%tmp8 = bitcast i32* %tmp6 to <4 x i32>*
%wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp8, i32 4, <4 x i1> %tmp7, <4 x i32> undef)
%tmp9 = getelementptr inbounds i32, i32* %B, i32 %index
%tmp10 = bitcast i32* %tmp9 to <4 x i32>*
%wide.masked.load29 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp10, i32 4, <4 x i1> %tmp7, <4 x i32> undef)
%tmp11 = mul nsw <4 x i32> %wide.masked.load29, %wide.masked.load
%tmp12 = add nsw <4 x i32> %vec.phi, %tmp11
%tt8 = bitcast i32* %tt6 to <4 x i32>*
%wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tt8, i32 4, <4 x i1> %tt7, <4 x i32> undef)
%tt9 = getelementptr inbounds i32, i32* %B, i32 %index
%tt10 = bitcast i32* %tt9 to <4 x i32>*
%wide.masked.load29 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tt10, i32 4, <4 x i1> %tt7, <4 x i32> undef)
%tt11 = mul nsw <4 x i32> %wide.masked.load29, %wide.masked.load
%tt12 = add nsw <4 x i32> %vec.phi, %tt11
%index.next = add i32 %index, 4
%tmp13 = call i32 @llvm.loop.decrement.reg.i32(i32 %tmp5, i32 1)
%tmp14 = icmp ne i32 %tmp13, 0
br i1 %tmp14, label %vector.body, label %middle.block
%tt13 = call i32 @llvm.loop.decrement.reg.i32(i32 %tt5, i32 1)
%tt14 = icmp ne i32 %tt13, 0
br i1 %tt14, label %vector.body, label %middle.block
middle.block: ; preds = %vector.body
%tmp15 = select <4 x i1> %tmp7, <4 x i32> %tmp12, <4 x i32> %vec.phi
%tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp15)
store i32 %tmp16, i32* %arrayidx7.us, align 4
%tt15 = select <4 x i1> %tt7, <4 x i32> %tt12, <4 x i32> %vec.phi
%tt16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tt15)
store i32 %tt16, i32* %arrayidx7.us, align 4
%inc9.us = add nuw i32 %i.024.us, 1
%exitcond26 = icmp eq i32 %inc9.us, %N
br i1 %exitcond26, label %for.cond.cleanup, label %for.cond1.preheader.us
@ -250,7 +250,7 @@ declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #2
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #2

View File

@ -13,11 +13,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
@ -49,7 +49,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
@ -152,7 +152,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14, $noreg
bb.2.vector.body:

View File

@ -14,7 +14,7 @@
br i1 %cmp11, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
%6 = shl i32 %4, 3
%7 = sub i32 %N, %6
br label %vector.body
@ -23,7 +23,7 @@
%lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %13, %vector.body ]
%8 = phi i32 [ %5, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ]
%lsr.iv2022 = bitcast i8* %lsr.iv20 to <16 x i8>*
%lsr.iv19 = bitcast i8* %lsr.iv to <16 x i8>*
@ -54,7 +54,7 @@
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4
@ -180,7 +180,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 35, 14, $noreg, $noreg
renamable $r3 = t2LSRri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -14,14 +14,14 @@
br i1 %cmp10, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv19 = phi i8* [ %scevgep20, %vector.body ], [ %res, %vector.ph ]
%lsr.iv16 = phi i8* [ %scevgep17, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1921 = bitcast i8* %lsr.iv19 to <16 x i8>*
%lsr.iv1618 = bitcast i8* %lsr.iv16 to <16 x i8>*
@ -45,7 +45,7 @@
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
@ -155,7 +155,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -14,11 +14,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
@ -39,7 +39,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
@ -123,7 +123,7 @@ body: |
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.2.vector.body:

View File

@ -14,14 +14,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i8* %lsr.iv to <4 x i8>*
%lsr.iv1416 = bitcast i8* %lsr.iv14 to <4 x i8>*
@ -61,14 +61,14 @@
br i1 %cmp10.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi i8* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv14 = bitcast i8* %lsr.iv to <4 x i8>*
%lsr.iv1517 = bitcast i8* %lsr.iv15 to <4 x i8>*
@ -108,14 +108,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i16* %lsr.iv to <4 x i16>*
%lsr.iv1416 = bitcast i16* %lsr.iv14 to <4 x i16>*
@ -155,14 +155,14 @@
br i1 %cmp10.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi i16* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
%lsr.iv1517 = bitcast i16* %lsr.iv15 to <4 x i16>*
@ -203,14 +203,14 @@
br i1 %cmp8.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv13 = phi i32* [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv12 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1315 = bitcast i32* %lsr.iv13 to <4 x i32>*
@ -249,14 +249,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
@ -286,7 +286,7 @@
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -372,7 +372,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -478,7 +478,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -585,7 +585,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -691,7 +691,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -797,7 +797,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -903,7 +903,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -69,26 +69,26 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vadd.i16 q1, q0, q1
; CHECK-NEXT: vadd.i16 q0, q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: vadd.i16 q1, q1, q2
; CHECK-NEXT: vadd.i16 q0, q0, q2
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
@ -142,25 +142,25 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocaptur
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r3, r2, #15
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #15
; CHECK-NEXT: sub.w r12, r3, #16
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #4
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u8 q1, [r1], #16
; CHECK-NEXT: vldrbt.u8 q0, [r1], #16
; CHECK-NEXT: vldrbt.u8 q2, [r0], #16
; CHECK-NEXT: subs r2, #16
; CHECK-NEXT: vsub.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q1, q1, q0
; CHECK-NEXT: vsub.i8 q0, q2, q0
; CHECK-NEXT: vadd.i8 q0, q0, q1
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
@ -212,25 +212,25 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vsub.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vsub.i16 q0, q2, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
@ -284,25 +284,25 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocaptur
; CHECK-NEXT: .LBB4_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r3, r2, #15
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #15
; CHECK-NEXT: sub.w r12, r3, #16
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #4
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u8 q1, [r0], #16
; CHECK-NEXT: vldrbt.u8 q0, [r0], #16
; CHECK-NEXT: vldrbt.u8 q2, [r1], #16
; CHECK-NEXT: subs r2, #16
; CHECK-NEXT: vmul.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q1, q1, q0
; CHECK-NEXT: vmul.i8 q0, q2, q0
; CHECK-NEXT: vadd.i8 q0, q0, q1
; CHECK-NEXT: le lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
@ -354,25 +354,25 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB5_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vmul.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vmul.i16 q0, q2, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB5_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
@ -423,36 +423,36 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: beq .LBB6_8
; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: subs r6, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: add.w r3, r3, r6, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u32 q1, [r4], #4
; CHECK-NEXT: vldrbt.u32 q0, [r4], #4
; CHECK-NEXT: vldrbt.u32 q2, [r5], #4
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmul.i32 q1, q2, q1
; CHECK-NEXT: vadd.i32 q1, q1, q0
; CHECK-NEXT: vmul.i32 q0, q2, q0
; CHECK-NEXT: vadd.i32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB6_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r12, q0
; CHECK-NEXT: cbz r2, .LBB6_7
; CHECK-NEXT: @ %bb.4: @ %vector.ph47
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: add.w r3, r3, r6, lsr #2
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: vdup.32 q0, r6
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: .LBB6_5: @ %vector.body46
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@ -550,32 +550,32 @@ define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocaptur
; CHECK-NEXT: cbz r2, .LBB7_4
; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmov q3, q1
; CHECK-NEXT: add.w lr, r4, r3, lsr #3
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: vmov q3, q0
; CHECK-NEXT: add.w r3, r4, r3, lsr #3
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: .LBB7_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r3], #8
; CHECK-NEXT: vldrbt.u16 q0, [r3], #8
; CHECK-NEXT: vldrbt.u16 q4, [r4], #8
; CHECK-NEXT: vmov q2, q3
; CHECK-NEXT: vsub.i16 q3, q4, q1
; CHECK-NEXT: vmul.i16 q1, q4, q1
; CHECK-NEXT: vsub.i16 q3, q4, q0
; CHECK-NEXT: vmul.i16 q0, q4, q0
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vadd.i16 q3, q3, q2
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB7_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q2, q3, q2
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r4, q2
; CHECK-NEXT: vaddv.u16 r2, q0
; CHECK-NEXT: b .LBB7_5
@ -643,40 +643,40 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: movw r12, #47184
; CHECK-NEXT: movw r3, #23593
; CHECK-NEXT: ldrd r2, lr, [r1, #4]
; CHECK-NEXT: movw r1, #23593
; CHECK-NEXT: movt r12, #1310
; CHECK-NEXT: movt r3, #49807
; CHECK-NEXT: mla r3, lr, r3, r12
; CHECK-NEXT: movw r1, #55051
; CHECK-NEXT: movt r1, #49807
; CHECK-NEXT: mla r1, lr, r1, r12
; CHECK-NEXT: movw r3, #55051
; CHECK-NEXT: movw r4, #23593
; CHECK-NEXT: movt r1, #163
; CHECK-NEXT: movt r3, #163
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: movt r4, #655
; CHECK-NEXT: ror.w r12, r3, #4
; CHECK-NEXT: cmp r12, r1
; CHECK-NEXT: cset r1, lo
; CHECK-NEXT: ror.w r3, r3, #2
; CHECK-NEXT: ror.w r12, r1, #4
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: cset r3, lo
; CHECK-NEXT: ror.w r1, r1, #2
; CHECK-NEXT: mov.w r12, #1
; CHECK-NEXT: cmp r3, r4
; CHECK-NEXT: csel r3, r1, r12, lo
; CHECK-NEXT: cmp r1, r4
; CHECK-NEXT: csel r1, r3, r12, lo
; CHECK-NEXT: lsls.w r4, lr, #30
; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r3, r3, r1, ne
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r4, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: movs r4, #52
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: movw r3, :lower16:days
; CHECK-NEXT: movt r3, :upper16:days
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mla r1, r1, r4, r3
; CHECK-NEXT: adds r1, r2, #3
; CHECK-NEXT: bic r1, r1, #3
; CHECK-NEXT: subs r1, #4
; CHECK-NEXT: add.w r4, r12, r1, lsr #2
; CHECK-NEXT: movw r12, :lower16:days
; CHECK-NEXT: movt r12, :upper16:days
; CHECK-NEXT: movs r1, #52
; CHECK-NEXT: mla r1, r3, r1, r12
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View File

@ -105,8 +105,8 @@ define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32*
; CHECK-NEXT: vmov.i32 q2, #0x1
; CHECK-NEXT: add.w lr, r5, #3
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: add.w lr, r5, lr, lsr #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r5, r5, lr, lsr #2
; CHECK-NEXT: dls lr, r5
; CHECK-NEXT: .LBB1_1: @ %bb6
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r12

View File

@ -31,13 +31,13 @@
%ind.end17 = getelementptr float, float* %pDst, i32 %n.vec
%scevgep9 = getelementptr float, float* %pDst, i32 -4
%scevgep14 = getelementptr float, float* %pSrc, i32 -4
call void @llvm.set.loop.iterations.i32(i32 %4)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi float* [ %scevgep16, %vector.body ], [ %scevgep14, %vector.ph ]
%lsr.iv10 = phi float* [ %scevgep11, %vector.body ], [ %scevgep9, %vector.ph ]
%5 = phi i32 [ %4, %vector.ph ], [ %7, %vector.body ]
%5 = phi i32 [ %start1, %vector.ph ], [ %7, %vector.body ]
%lsr.iv1517 = bitcast float* %lsr.iv15 to <4 x float>*
%lsr.iv1012 = bitcast float* %lsr.iv10 to <4 x float>*
%scevgep18 = getelementptr <4 x float>, <4 x float>* %lsr.iv1517, i32 1
@ -61,13 +61,13 @@
%pDst.addr.06.ph = phi float* [ %pDst, %vector.memcheck ], [ %pDst, %while.body.preheader ], [ %ind.end17, %middle.block ]
%scevgep1 = getelementptr float, float* %pSrc.addr.07.ph, i32 -1
%scevgep4 = getelementptr float, float* %pDst.addr.06.ph, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %blkCnt.08.ph)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %blkCnt.08.ph)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader19
%lsr.iv5 = phi float* [ %scevgep6, %while.body ], [ %scevgep4, %while.body.preheader19 ]
%lsr.iv = phi float* [ %scevgep2, %while.body ], [ %scevgep1, %while.body.preheader19 ]
%9 = phi i32 [ %blkCnt.08.ph, %while.body.preheader19 ], [ %12, %while.body ]
%9 = phi i32 [ %start2, %while.body.preheader19 ], [ %12, %while.body ]
%scevgep3 = getelementptr float, float* %lsr.iv, i32 1
%scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
%10 = load float, float* %scevgep3, align 4
@ -84,7 +84,7 @@
}
declare float @llvm.fabs.f32(float)
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
...
@ -262,7 +262,7 @@ body: |
renamable $r7, dead $cpsr = tSUBrr renamable $r2, renamable $r4, 14, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r12 = t2ADDrs renamable $r0, renamable $r4, 18, 14, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 16, 14, $noreg
$r5 = tMOVr killed $r3, 14, $noreg
renamable $r3 = t2ADDrs renamable $r1, renamable $r4, 18, 14, $noreg, $noreg
@ -305,7 +305,7 @@ body: |
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r3, 4, 14, $noreg
renamable $r1 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8.while.body:
successors: %bb.8(0x7c000000), %bb.9(0x04000000)

View File

@ -14,12 +14,12 @@
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
%add = add nsw i32 %call, %res.07
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
@ -33,7 +33,7 @@
declare i32 @bar(...) local_unnamed_addr #0
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
attributes #0 = { "target-features"="+mve.fp" }
@ -109,7 +109,7 @@ body: |
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -14,11 +14,11 @@
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 0
%2 = icmp ne i32 %1, 0
@ -29,7 +29,7 @@
ret i32 %res.0.lcssa
}
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
attributes #0 = { "target-features"="+mve.fp" }
@ -96,7 +96,7 @@ body: |
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -14,11 +14,11 @@
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 2
%2 = icmp ne i32 %1, 0
@ -30,7 +30,7 @@
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -102,7 +102,7 @@ body: |
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -30,7 +30,7 @@
%gap.057 = sdiv i32 %gap.057.in, 2
%cmp252 = icmp slt i32 %gap.057, %n
%tmp = sub i32 %n, %gap.057
call void @llvm.set.loop.iterations.i32(i32 %tmp)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp)
br i1 %cmp252, label %for.cond4.preheader.preheader, label %for.cond.loopexit
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
@ -44,7 +44,7 @@
%lsr.iv2 = phi i32* [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ]
%lsr.iv = phi i32* [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ]
%i.053 = phi i32 [ %inc, %for.inc16 ], [ %gap.057, %for.cond4.preheader.preheader ]
%tmp8 = phi i32 [ %tmp, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ]
%tmp8 = phi i32 [ %start, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ]
%j.048 = sub nsw i32 %i.053, %gap.057
%cmp549 = icmp sgt i32 %j.048, -1
br i1 %cmp549, label %land.rhs.preheader, label %for.inc16
@ -93,7 +93,7 @@
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
@ -208,7 +208,7 @@ body: |
renamable $lr = t2SUBrs renamable $r1, renamable $r2, 9, 14, $noreg, $noreg
renamable $r9 = t2ASRri renamable $r2, 1, 14, $noreg, $noreg
t2CMPrs renamable $r1, killed renamable $r2, 9, 14, $noreg, implicit-def $cpsr
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
tBcc %bb.2, 13, killed $cpsr
bb.4.for.cond4.preheader.preheader:

View File

@ -11,7 +11,7 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
@ -20,7 +20,7 @@
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%0 = phi i32 [ %start, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
@ -35,7 +35,7 @@
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
@ -120,7 +120,7 @@ body: |
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
$lr = t2DoLoopStart $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg

View File

@ -8,11 +8,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -43,11 +43,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <8 x i16>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <8 x i16>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -72,7 +72,7 @@
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
@ -160,7 +160,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$lr = t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.2.loop.body:
@ -261,7 +261,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.loop.body:

View File

@ -15,29 +15,29 @@ define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noa
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: .LBB0_3: @ %for.body4.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh.w r6, [r0, r5, lsl #1]
; CHECK-NEXT: ldrh.w r7, [r1, r5, lsl #1]
; CHECK-NEXT: add r6, r7
; CHECK-NEXT: strh.w r6, [r4, r5, lsl #1]
; CHECK-NEXT: adds r5, #1
; CHECK-NEXT: ldrh.w r5, [r0, r6, lsl #1]
; CHECK-NEXT: ldrh.w r7, [r1, r6, lsl #1]
; CHECK-NEXT: add r5, r7
; CHECK-NEXT: strh.w r5, [r4, r6, lsl #1]
; CHECK-NEXT: adds r6, #1
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %for.body15.us.preheader
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: .LBB0_5: @ %for.body15.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh.w r7, [r0, r5, lsl #1]
; CHECK-NEXT: ldrh.w r6, [r1, r5, lsl #1]
; CHECK-NEXT: add r6, r7
; CHECK-NEXT: strh.w r6, [r2, r5, lsl #1]
; CHECK-NEXT: adds r5, #1
; CHECK-NEXT: ldrh.w r7, [r0, r6, lsl #1]
; CHECK-NEXT: ldrh.w r5, [r1, r6, lsl #1]
; CHECK-NEXT: add r5, r7
; CHECK-NEXT: strh.w r5, [r2, r6, lsl #1]
; CHECK-NEXT: adds r6, #1
; CHECK-NEXT: le lr, .LBB0_5
; CHECK-NEXT: @ %bb.6: @ %for.cond.cleanup14.us
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1

View File

@ -16,7 +16,7 @@
%scevgep = getelementptr i32, i32* %a, i32 -1
%scevgep4 = getelementptr i32, i32* %c, i32 -1
%scevgep8 = getelementptr i32, i32* %b, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
@ -26,7 +26,7 @@
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
%0 = phi i32 [ %start, %for.body.preheader ], [ %3, %for.body ]
%size = call i32 @llvm.arm.space(i32 4070, i32 undef)
%scevgep3 = getelementptr i32, i32* %lsr.iv9, i32 1
%1 = load i32, i32* %scevgep3, align 4
@ -47,7 +47,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -155,7 +155,7 @@ body: |
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
$lr = t2DoLoopStart killed $r3
bb.2.for.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -20,7 +20,7 @@
vector.ph: ; preds = %entry
%7 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0, !dbg !32
call void @llvm.set.loop.iterations.i32(i32 %6), !dbg !32
%start = call i32 @llvm.start.loop.iterations.i32(i32 %6), !dbg !32
%8 = shl i32 %5, 2, !dbg !32
%9 = sub i32 %N, %8, !dbg !32
br label %vector.body, !dbg !32
@ -28,7 +28,7 @@
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33
%vec.phi = phi <4 x i32> [ %7, %vector.ph ], [ %15, %vector.body ]
%10 = phi i32 [ %6, %vector.ph ], [ %16, %vector.body ]
%10 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ]
%11 = phi i32 [ %N, %vector.ph ], [ %13, %vector.body ]
%lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
%12 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %11), !dbg !34
@ -59,7 +59,7 @@
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -261,7 +261,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, renamable $r3, 19, 14, $noreg, $noreg, debug-location !32
renamable $r3, dead $cpsr = tLSRri killed renamable $r3, 2, 14, $noreg, debug-location !32
renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 18, 14, $noreg, $noreg, debug-location !32
t2DoLoopStart renamable $lr, debug-location !32
$lr = t2DoLoopStart renamable $lr, debug-location !32
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

View File

@ -17,7 +17,7 @@
br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body
for.cond.cleanup: ; preds = %for.inc, %entry
@ -30,7 +30,7 @@
%lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
%spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
%found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
%0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
%0 = phi i32 [ %start, %for.body.preheader ], [ %3, %for.inc ]
%1 = load i8, i8* %lsr.iv1, align 1
%2 = zext i8 %1 to i32
switch i32 %2, label %for.inc [
@ -58,7 +58,7 @@
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
@ -130,7 +130,7 @@ body: |
liveins: $r0, $r1
$lr = tMOVr $r1, 14, $noreg
t2DoLoopStart killed $r1
$lr = t2DoLoopStart killed $r1
renamable $r1, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14, $noreg

View File

@ -25,12 +25,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <16 x i32> %broadcast.splatinsert10, <16 x i32> undef, <16 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
%induction = or <16 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@ -82,12 +82,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -138,12 +138,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -193,12 +193,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -252,12 +252,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -311,12 +311,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -374,7 +374,7 @@ vector.ph:
%scevgep = getelementptr i32, i32* %A, i32 8
%scevgep30 = getelementptr i32, i32* %C, i32 8
%scevgep37 = getelementptr i32, i32* %B, i32 8
call void @llvm.set.loop.iterations.i32(i32 %v5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %v5)
br label %vector.body
vector.body:
@ -382,7 +382,7 @@ vector.body:
%lsr.iv31 = phi i32* [ %scevgep32, %vector.body ], [ %scevgep30, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep25, %vector.body ], [ %scevgep, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %v14, %vector.body ]
%v6 = phi i32 [ %v5, %vector.ph ], [ %v15, %vector.body ]
%v6 = phi i32 [ %start, %vector.ph ], [ %v15, %vector.body ]
%lsr.iv3840 = bitcast i32* %lsr.iv38 to <4 x i32>*
%lsr.iv3133 = bitcast i32* %lsr.iv31 to <4 x i32>*
%lsr.iv26 = bitcast i32* %lsr.iv to <4 x i32>*
@ -447,7 +447,7 @@ entry:
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
vector.ph:
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -455,7 +455,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
@ -496,7 +496,7 @@ entry:
vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -504,7 +504,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
@ -547,7 +547,7 @@ entry:
vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -558,7 +558,7 @@ vector.body: ; preds = %vector.body, %vecto
; AddRec base is not 0:
%index = phi i32 [ 1, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
@ -589,7 +589,7 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32 immarg, <2 x i1>)
declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32 immarg, <2 x i1>, <2 x i64>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)

View File

@ -4,14 +4,14 @@
define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 8001)
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 8001, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 32003, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>*
@ -36,7 +36,7 @@ define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture rea
; CHECK-NEXT: ret void
;
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -44,7 +44,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -77,13 +77,13 @@ for.cond.cleanup:
define dso_local void @foo2(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
; CHECK-LABEL: @foo2(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 2000)
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 2000)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 2000, [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV10:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>*
@ -101,14 +101,14 @@ define dso_local void @foo2(i32* noalias nocapture %A, i32* noalias nocapture re
; CHECK-NEXT: ret void
;
entry:
call void @llvm.set.loop.iterations.i32(i32 2000)
%start = call i32 @llvm.start.loop.iterations.i32(i32 2000)
br label %vector.body
vector.body:
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %A, %entry ]
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%0 = phi i32 [ 2000, %entry ], [ %2, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %2, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -131,14 +131,14 @@ for.cond.cleanup:
define dso_local void @foo3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
; CHECK-LABEL: @foo3(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 8001)
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 8001, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV10:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>*
@ -161,7 +161,7 @@ define dso_local void @foo3(i32* noalias nocapture %A, i32* noalias nocapture re
; CHECK-NEXT: ret void
;
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -169,7 +169,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -199,14 +199,14 @@ for.cond.cleanup:
define dso_local void @foo5(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
; CHECK-LABEL: @foo5(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 8001)
; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 8001, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>*
; CHECK-NEXT: [[LSR_IV10:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>*
@ -229,7 +229,7 @@ define dso_local void @foo5(i32* noalias nocapture %A, i32* noalias nocapture re
; CHECK-NEXT: ret void
;
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -237,7 +237,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -273,7 +273,7 @@ for.cond.cleanup:
;
define dso_local void @inconsistent_tripcounts(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -281,7 +281,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -316,7 +316,7 @@ for.cond.cleanup:
;
define dso_local void @overflow_in_sub(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
entry:
call void @llvm.set.loop.iterations.i32(i32 1073741824)
%start = call i32 @llvm.start.loop.iterations.i32(i32 1073741824)
br label %vector.body
vector.body:
@ -324,7 +324,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -359,7 +359,7 @@ for.cond.cleanup:
;
define dso_local void @IV_not_an_induction(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -367,7 +367,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -402,7 +402,7 @@ for.cond.cleanup:
;
define dso_local void @IV_wrong_step(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -410,7 +410,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -448,7 +448,7 @@ for.cond.cleanup:
;
define dso_local void @IV_step_not_constant(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
entry:
call void @llvm.set.loop.iterations.i32(i32 8001)
%start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
br label %vector.body
vector.body:
@ -456,7 +456,7 @@ vector.body:
%lsr.iv11 = phi i32* [ %scevgep12, %vector.body ], [ %C, %entry ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %entry ]
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = phi i32 [ 8001, %entry ], [ %3, %vector.body ]
%0 = phi i32 [ %start, %entry ], [ %3, %vector.body ]
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>*
%lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>*
@ -502,7 +502,7 @@ vector.ph: ; preds = %vector.ph.preheader
%lsr.iv31 = phi i32* [ %C, %vector.ph.preheader ], [ %scevgep32, %for.cond.cleanup3 ]
%lsr.iv = phi i32* [ %A, %vector.ph.preheader ], [ %scevgep, %for.cond.cleanup3 ]
%j.025 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %vector.ph.preheader ]
call void @llvm.set.loop.iterations.i32(i32 1025)
%start = call i32 @llvm.start.loop.iterations.i32(i32 1025)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -510,7 +510,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv33 = phi i32* [ %scevgep34, %vector.body ], [ %lsr.iv31, %vector.ph ]
%lsr.iv28 = phi i32* [ %scevgep29, %vector.body ], [ %lsr.iv, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%0 = phi i32 [ 1025, %vector.ph ], [ %2, %vector.body ]
%0 = phi i32 [ %start, %vector.ph ], [ %2, %vector.body ]
%lsr.iv3840 = bitcast i32* %lsr.iv38 to <4 x i32>*
%lsr.iv3335 = bitcast i32* %lsr.iv33 to <4 x i32>*
%lsr.iv2830 = bitcast i32* %lsr.iv28 to <4 x i32>*
@ -546,5 +546,5 @@ for.cond.cleanup3: ; preds = %vector.body
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare i32 @llvm.loop.decrement.reg.i32(i32 , i32 )
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)

View File

@ -83,7 +83,7 @@ entry:
vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -91,7 +91,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
@ -118,6 +118,6 @@ for.cond.cleanup: ; preds = %vector.body, %entry
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)

View File

@ -246,11 +246,11 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: adr r3, .LCPI5_0
; CHECK-NEXT: vldrw.u32 q0, [r3]
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vadd.i32 q2, q0, r12

View File

@ -18,12 +18,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -50,5 +50,5 @@ for.cond.cleanup: ; preds = %vector.body, %entry
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

View File

@ -20,12 +20,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 1
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -65,12 +65,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -110,12 +110,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -155,12 +155,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -200,12 +200,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.2 = add i32 %N, -2
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.2, i32 1
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -245,12 +245,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 1
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -289,12 +289,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%incorrect = add i32 %index, 1
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %incorrect, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@ -335,12 +335,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -380,12 +380,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 1, i32 2, i32 3, i32 4>
@ -425,12 +425,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, %offsets
@ -470,12 +470,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -501,6 +501,6 @@ for.cond.cleanup: ; preds = %vector.body, %entry
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3

View File

@ -23,13 +23,13 @@ vector.ph:
%0 = add i32 %n.vec, -8
%1 = lshr i32 %0, 3
%2 = add i32 %1, 1
call void @llvm.set.loop.iterations.i32(i32 %2)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %2)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp8, %vector.body ]
%3 = phi i32 [ %2, %vector.ph], [ %4, %vector.body ]
%3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -94,13 +94,13 @@ vector.ph:
%0 = add i32 %n.vec, -8
%1 = lshr i32 %0, 3
%2 = add nuw nsw i32 %1, 1
call void @llvm.set.loop.iterations.i32(i32 %2)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %2)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp6, %vector.body ]
%3 = phi i32 [ %2, %vector.ph], [ %4, %vector.body ]
%3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -158,13 +158,13 @@ entry:
%0 = add i32 %n.vec, -8
%1 = lshr i32 %0, 3
%2 = add nuw nsw i32 %1, 1
call void @llvm.set.loop.iterations.i32(i32 %2)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %2)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %entry], [ %index.next, %vector.body ]
%vec.phi = phi <8 x i16> [ zeroinitializer, %entry], [ %tmp6, %vector.body ]
%3 = phi i32 [ %2, %entry ], [ %4, %vector.body ]
%3 = phi i32 [ %start, %entry ], [ %4, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -228,7 +228,7 @@ for.body:
vector.ph: ; preds = %for.body
%trip.count.minus.1 = add i32 %8, -1
call void @llvm.set.loop.iterations.i32(i32 %7)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %7)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -236,7 +236,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %Input, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %16, %vector.body ]
%9 = phi i32 [ %7, %vector.ph ], [ %17, %vector.body ]
%9 = phi i32 [ %start, %vector.ph ], [ %17, %vector.body ]
%lsr.iv4850 = bitcast i16* %lsr.iv48 to <4 x i16>*
%lsr.iv45 = bitcast i16* %lsr.iv to <4 x i16>*
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %8)
@ -278,7 +278,7 @@ for.end17: ; preds = %for.end, %entry
}
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)

View File

@ -17,12 +17,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -76,13 +76,13 @@ vector.ph: ; preds = %entry
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
%broadcast.splatinsert10.store = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11.store = shufflevector <4 x i32> %broadcast.splatinsert10.store, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%store.idx = phi i32 [ 0, %vector.ph ], [ %store.idx.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -139,12 +139,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
@ -178,7 +178,7 @@ declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32 immarg,
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)

View File

@ -7,14 +7,14 @@ define dso_local arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: adr r3, .LCPI0_0
; CHECK-NEXT: mov.w lr, #3
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: vldrw.u32 q2, [r3]
; CHECK-NEXT: vmov.i32 q0, #0x80000000
; CHECK-NEXT: vmvn.i32 q1, #0x80000000
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov.i32 q3, #0xa
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vadd.i32 q4, q2, r2

View File

@ -14,11 +14,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%lsr.iv.2 = phi i16* [ %scevgep.2, %vector.body ], [ %c, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -40,7 +40,7 @@
}
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
@ -132,7 +132,7 @@ body: |
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
$r12 = t2MOVi16 32768, 14 /* CC::al */, $noreg
$r12 = t2MOVTi16 killed $r12, 65535, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r5 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.2.vector.body:

View File

@ -57,9 +57,9 @@ define i32 @bad(i32* readonly %x, i32* nocapture readonly %y, i32 %n) {
; CHECK-NEXT: subs r3, r2, r3
; CHECK-NEXT: add.w r12, r3, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB1_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2

View File

@ -37,18 +37,18 @@
br i1 %7, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
for.body.preheader.new: ; preds = %for.body.preheader
call void @llvm.set.loop.iterations.i32(i32 %11)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %11)
br label %for.body
vector.ph: ; preds = %vector.memcheck
call void @llvm.set.loop.iterations.i32(i32 %5)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv50 = phi i8* [ %scevgep51, %vector.body ], [ %res, %vector.ph ]
%lsr.iv47 = phi i8* [ %scevgep48, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep45, %vector.body ], [ %a, %vector.ph ]
%12 = phi i32 [ %5, %vector.ph ], [ %17, %vector.body ]
%12 = phi i32 [ %start2, %vector.ph ], [ %17, %vector.body ]
%13 = phi i32 [ %N, %vector.ph ], [ %15, %vector.body ]
%lsr.iv5052 = bitcast i8* %lsr.iv50 to <16 x i8>*
%lsr.iv4749 = bitcast i8* %lsr.iv47 to <16 x i8>*
@ -88,7 +88,7 @@
for.body: ; preds = %for.body, %for.body.preheader.new
%i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ]
%21 = phi i32 [ %11, %for.body.preheader.new ], [ %30, %for.body ]
%21 = phi i32 [ %start1, %for.body.preheader.new ], [ %30, %for.body ]
%scevgep23 = getelementptr i8, i8* %a, i32 %i.011
%scevgep2453 = bitcast i8* %scevgep23 to i8*
%22 = load i8, i8* %scevgep2453, align 1
@ -159,7 +159,7 @@
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4
@ -429,7 +429,7 @@ body: |
renamable $r6 = t2BICri killed renamable $r6, 15, 14, $noreg, $noreg
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 16, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r6, 35, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body:
successors: %bb.5(0x7c000000), %bb.11(0x04000000)
@ -455,7 +455,7 @@ body: |
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r3, 19, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.7.for.body:
successors: %bb.7(0x7c000000), %bb.8(0x04000000)

View File

@ -7,14 +7,14 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
%limit = lshr i32 %n, 1
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ]
%tmp = phi i32 [ %start, %entry ], [ %tmp2, %while.body ]
%scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1
%tmp1 = load i32, i32* %scevgep7, align 4
@ -33,7 +33,7 @@
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
@ -130,7 +130,7 @@ body: |
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
renamable $r2 = t2LSRri renamable $lr, 1, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.1.while.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)

View File

@ -7,14 +7,14 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
%limit = lshr i32 %n, 1
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ]
%tmp = phi i32 [ %start, %entry ], [ %tmp2, %while.body ]
%scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1
%tmp1 = load i32, i32* %scevgep7, align 4
@ -33,7 +33,7 @@
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
@ -129,7 +129,7 @@ body: |
frame-setup CFI_INSTRUCTION offset $r7, -8
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
t2DoLoopStart renamable $r0
$lr = t2DoLoopStart renamable $r0
renamable $r2 = t2LSRri renamable $r0, 1, 14, $noreg, $noreg
$lr = tMOVr $r0, 14, $noreg

View File

@ -1,122 +0,0 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
# CHECK-NOT: $lr = t2DLS
# CHECK: $lr = tMOVr $r0, 14
# CHECK-NOT: $lr = t2LEUpdate
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %while.body, label %while.end
while.end: ; preds = %while.body
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: do_copy
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
bb.1.preheader:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $lr
$lr = tMOVr $r0, 14, $noreg
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -8,11 +8,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -44,11 +44,11 @@
br i1 %cmp, label %exit, label %loop.ph
loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
@ -75,7 +75,7 @@
ret void
}
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
@ -163,7 +163,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$lr = t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.2.loop.body:
@ -269,7 +269,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$lr = t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.2.loop.body:

View File

@ -9,7 +9,7 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
@ -18,7 +18,7 @@
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%0 = phi i32 [ %start, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
@ -33,7 +33,7 @@
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
@ -89,11 +89,12 @@ body: |
; CHECK-LABEL: name: do_copy
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r2, $r7
; CHECK: liveins: $r0, $r2, $r7
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: $lr = t2DLS killed $r0
; CHECK: renamable $r0 = t2SUBri killed renamable $lr, 4, 14 /* CC::al */, $noreg, def dead $cpsr
; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: bb.1.preheader:
@ -105,9 +106,7 @@ body: |
; CHECK: liveins: $lr, $r0, $r1
; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep6)
; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2)
; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK: tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
; CHECK: tB %bb.3, 14 /* CC::al */, $noreg
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.while.end:
; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $r0
@ -119,7 +118,7 @@ body: |
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
$lr = t2DoLoopStart $r0
renamable $r0 = t2SUBri killed renamable $lr, 4, 14, $noreg, def $cpsr
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg

View File

@ -13,11 +13,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -51,11 +51,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -89,11 +89,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -127,11 +127,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -165,11 +165,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -204,11 +204,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -243,11 +243,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -282,11 +282,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -321,11 +321,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -361,11 +361,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -401,11 +401,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -440,11 +440,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -479,11 +479,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -518,11 +518,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -557,11 +557,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
@ -596,11 +596,11 @@
br i1 %cmp9, label %exit, label %vector.ph
vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ]
@ -635,7 +635,7 @@
br i1 %cmp22, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
@ -643,7 +643,7 @@
%y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %4, %while.body.preheader ], [ %6, %while.body ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
%tmp3 = bitcast i16* %y.addr.025 to <4 x i16>*
%tmp1 = bitcast i16* %x.addr.026 to <4 x i16>*
%tmp = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.023)
@ -678,7 +678,7 @@
br i1 %cmp22, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
@ -686,7 +686,7 @@
%y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %4, %while.body.preheader ], [ %6, %while.body ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
%tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
%tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
@ -720,7 +720,7 @@
br i1 %cmp22, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
@ -728,7 +728,7 @@
%y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %4, %while.body.preheader ], [ %6, %while.body ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
%tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
%tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
@ -763,7 +763,7 @@
br i1 %cmp22, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %4)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
@ -771,7 +771,7 @@
%y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ]
%n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ]
%acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ]
%5 = phi i32 [ %4, %while.body.preheader ], [ %6, %while.body ]
%5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ]
%tmp3 = bitcast i16* %y.addr.025 to <8 x i16>*
%tmp1 = bitcast i16* %x.addr.026 to <8 x i16>*
%tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023)
@ -803,7 +803,7 @@
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
@ -887,7 +887,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -986,7 +986,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -1085,7 +1085,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -1185,7 +1185,7 @@ body: |
renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r3 = tMOVr killed $r2, 14 /* CC::al */, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
@ -1304,7 +1304,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -1417,7 +1417,7 @@ body: |
renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r3 = tMOVr killed $r2, 14 /* CC::al */, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
@ -1537,7 +1537,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -1650,7 +1650,7 @@ body: |
renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r3 = tMOVr killed $r2, 14 /* CC::al */, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
@ -1779,7 +1779,7 @@ body: |
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -1904,7 +1904,7 @@ body: |
renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0)
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r4 = tMOVr killed $r2, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2032,7 +2032,7 @@ body: |
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2157,7 +2157,7 @@ body: |
renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0)
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r4 = tMOVr killed $r2, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2285,7 +2285,7 @@ body: |
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2410,7 +2410,7 @@ body: |
renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0)
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r4 = tMOVr killed $r2, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2538,7 +2538,7 @@ body: |
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2663,7 +2663,7 @@ body: |
renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0)
renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r4 = tMOVr killed $r2, 14 /* CC::al */, $noreg
bb.2.vector.body:
@ -2781,7 +2781,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -2897,7 +2897,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -3026,7 +3026,7 @@ body: |
renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
@ -3150,7 +3150,7 @@ body: |
renamable $r12 = t2ADDri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r2 = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
t2DoLoopStart renamable $r2
$lr = t2DoLoopStart renamable $r2
$r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg

View File

@ -26,7 +26,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; ENABLED-NEXT: ldr r0, [sp, #36]
; ENABLED-NEXT: add.w r12, r2, #3
; ENABLED-NEXT: ldr.w r10, [sp] @ 4-byte Reload
; ENABLED-NEXT: movs r6, #0
; ENABLED-NEXT: mov.w r8, #0
; ENABLED-NEXT: mov r9, r12
; ENABLED-NEXT: uxth r0, r0
; ENABLED-NEXT: rsbs r5, r0, #0
@ -37,32 +37,32 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLED-NEXT: lsrs r0, r0, #16
; ENABLED-NEXT: sub.w r9, r9, #1
; ENABLED-NEXT: strh.w r0, [r1, r6, lsl #1]
; ENABLED-NEXT: adds r6, #1
; ENABLED-NEXT: strh.w r0, [r1, r8, lsl #1]
; ENABLED-NEXT: add.w r8, r8, #1
; ENABLED-NEXT: add.w r10, r10, #2
; ENABLED-NEXT: cmp r6, r3
; ENABLED-NEXT: cmp r8, r3
; ENABLED-NEXT: beq .LBB0_8
; ENABLED-NEXT: .LBB0_4: @ %for.body
; ENABLED-NEXT: @ =>This Loop Header: Depth=1
; ENABLED-NEXT: @ Child Loop BB0_6 Depth 2
; ENABLED-NEXT: cmp r2, r6
; ENABLED-NEXT: cmp r2, r8
; ENABLED-NEXT: ble .LBB0_2
; ENABLED-NEXT: @ %bb.5: @ %vector.ph
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLED-NEXT: bic r0, r9, #3
; ENABLED-NEXT: movs r7, #1
; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: subs r4, r2, r6
; ENABLED-NEXT: sub.w r4, r2, r8
; ENABLED-NEXT: vmov.i32 q1, #0x0
; ENABLED-NEXT: add.w r8, r7, r0, lsr #2
; ENABLED-NEXT: sub.w r0, r12, r6
; ENABLED-NEXT: add.w r6, r7, r0, lsr #2
; ENABLED-NEXT: sub.w r0, r12, r8
; ENABLED-NEXT: bic r0, r0, #3
; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: add.w r0, r7, r0, lsr #2
; ENABLED-NEXT: mov r7, r10
; ENABLED-NEXT: dls lr, r0
; ENABLED-NEXT: ldr r0, [sp] @ 4-byte Reload
; ENABLED: .LBB0_6: @ %vector.body
; ENABLED-NEXT: .LBB0_6: @ %vector.body
; ENABLED-NEXT: @ Parent Loop BB0_4 Depth=1
; ENABLED-NEXT: @ => This Inner Loop Header: Depth=2
; ENABLED-NEXT: vctp.32 r4
@ -70,9 +70,9 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; ENABLED-NEXT: vpstt
; ENABLED-NEXT: vldrht.s32 q1, [r0], #8
; ENABLED-NEXT: vldrht.s32 q2, [r7], #8
; ENABLED-NEXT: mov lr, r8
; ENABLED-NEXT: mov lr, r6
; ENABLED-NEXT: vmul.i32 q1, q2, q1
; ENABLED-NEXT: sub.w r8, r8, #1
; ENABLED-NEXT: subs r6, #1
; ENABLED-NEXT: vshl.s32 q1, r5
; ENABLED-NEXT: subs r4, #4
; ENABLED-NEXT: vadd.i32 q1, q1, q0
@ -97,7 +97,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; NOREDUCTIONS-NEXT: ldr r0, [sp, #36]
; NOREDUCTIONS-NEXT: add.w r12, r2, #3
; NOREDUCTIONS-NEXT: ldr.w r10, [sp] @ 4-byte Reload
; NOREDUCTIONS-NEXT: movs r6, #0
; NOREDUCTIONS-NEXT: mov.w r8, #0
; NOREDUCTIONS-NEXT: mov r9, r12
; NOREDUCTIONS-NEXT: uxth r0, r0
; NOREDUCTIONS-NEXT: rsbs r5, r0, #0
@ -108,31 +108,31 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
; NOREDUCTIONS-NEXT: lsrs r0, r0, #16
; NOREDUCTIONS-NEXT: sub.w r9, r9, #1
; NOREDUCTIONS-NEXT: strh.w r0, [r1, r6, lsl #1]
; NOREDUCTIONS-NEXT: adds r6, #1
; NOREDUCTIONS-NEXT: strh.w r0, [r1, r8, lsl #1]
; NOREDUCTIONS-NEXT: add.w r8, r8, #1
; NOREDUCTIONS-NEXT: add.w r10, r10, #2
; NOREDUCTIONS-NEXT: cmp r6, r3
; NOREDUCTIONS: beq .LBB0_8
; NOREDUCTIONS-NEXT: cmp r8, r3
; NOREDUCTIONS-NEXT: beq .LBB0_8
; NOREDUCTIONS-NEXT: .LBB0_4: @ %for.body
; NOREDUCTIONS-NEXT: @ =>This Loop Header: Depth=1
; NOREDUCTIONS-NEXT: @ Child Loop BB0_6 Depth 2
; NOREDUCTIONS-NEXT: cmp r2, r6
; NOREDUCTIONS-NEXT: cmp r2, r8
; NOREDUCTIONS-NEXT: ble .LBB0_2
; NOREDUCTIONS-NEXT: @ %bb.5: @ %vector.ph
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
; NOREDUCTIONS-NEXT: bic r0, r9, #3
; NOREDUCTIONS-NEXT: movs r7, #1
; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: subs r4, r2, r6
; NOREDUCTIONS-NEXT: sub.w r4, r2, r8
; NOREDUCTIONS-NEXT: vmov.i32 q1, #0x0
; NOREDUCTIONS-NEXT: add.w r8, r7, r0, lsr #2
; NOREDUCTIONS-NEXT: sub.w r0, r12, r6
; NOREDUCTIONS-NEXT: add.w r6, r7, r0, lsr #2
; NOREDUCTIONS-NEXT: sub.w r0, r12, r8
; NOREDUCTIONS-NEXT: bic r0, r0, #3
; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: add.w r0, r7, r0, lsr #2
; NOREDUCTIONS-NEXT: mov r7, r10
; NOREDUCTIONS-NEXT: dls lr, r0
; NOREDUCTIONS: ldr r0, [sp] @ 4-byte Reload
; NOREDUCTIONS-NEXT: ldr r0, [sp] @ 4-byte Reload
; NOREDUCTIONS-NEXT: .LBB0_6: @ %vector.body
; NOREDUCTIONS-NEXT: @ Parent Loop BB0_4 Depth=1
; NOREDUCTIONS-NEXT: @ => This Inner Loop Header: Depth=2
@ -141,9 +141,9 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; NOREDUCTIONS-NEXT: vpstt
; NOREDUCTIONS-NEXT: vldrht.s32 q1, [r0], #8
; NOREDUCTIONS-NEXT: vldrht.s32 q2, [r7], #8
; NOREDUCTIONS-NEXT: mov lr, r8
; NOREDUCTIONS-NEXT: mov lr, r6
; NOREDUCTIONS-NEXT: vmul.i32 q1, q2, q1
; NOREDUCTIONS-NEXT: sub.w r8, r8, #1
; NOREDUCTIONS-NEXT: subs r6, #1
; NOREDUCTIONS-NEXT: vshl.s32 q1, r5
; NOREDUCTIONS-NEXT: subs r4, #4
; NOREDUCTIONS-NEXT: vadd.i32 q1, q1, q0
@ -184,7 +184,7 @@ for.body: ; preds = %for.end, %for.body.
vector.ph: ; preds = %for.body
%trip.count.minus.1 = add i32 %i8, -1
call void @llvm.set.loop.iterations.i32(i32 %i7)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %i7)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
@ -192,7 +192,7 @@ vector.body: ; preds = %vector.body, %vecto
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %Input, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %i16, %vector.body ]
%i9 = phi i32 [ %i7, %vector.ph ], [ %i17, %vector.body ]
%i9 = phi i32 [ %start, %vector.ph ], [ %i17, %vector.body ]
%lsr.iv4850 = bitcast i16* %lsr.iv48 to <4 x i16>*
%lsr.iv45 = bitcast i16* %lsr.iv to <4 x i16>*
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %i8)
@ -237,4 +237,4 @@ declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)

Some files were not shown because too many files have changed in this diff Show More