1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[ARM][LowOverheadLoops] Add LR def safety check

Converting the *LoopStart pseudo instructions into DLS/WLS results in
LR being defined. These instructions were inserted on the assumption
that LR would already contain the loop counter because a mov is
introduced during ISel as the the consumers in the loop can only use
LR. That assumption proved wrong!

So perform a safety check, finding an appropriate place to insert the
DLS/WLS instructions or revert if this isn't possible.

Differential Revision: https://reviews.llvm.org/D67539

llvm-svn: 372111
This commit is contained in:
Sam Parker 2019-09-17 12:19:32 +00:00
parent baf8b97a90
commit 3151c652a4
15 changed files with 741 additions and 312 deletions

View File

@ -34,6 +34,7 @@ using namespace llvm;
namespace {
class ARMLowOverheadLoops : public MachineFunctionPass {
MachineFunction *MF = nullptr;
const ARMBaseInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
@ -51,19 +52,6 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
bool ProcessLoop(MachineLoop *ML);
bool RevertNonLoops(MachineFunction &MF);
void RevertWhile(MachineInstr *MI) const;
void RevertLoopDec(MachineInstr *MI) const;
void RevertLoopEnd(MachineInstr *MI) const;
void Expand(MachineLoop *ML, MachineInstr *Start,
MachineInstr *Dec, MachineInstr *End, bool Revert);
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@ -72,6 +60,24 @@ namespace {
StringRef getPassName() const override {
return ARM_LOW_OVERHEAD_LOOPS_NAME;
}
private:
bool ProcessLoop(MachineLoop *ML);
MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
bool RevertNonLoops();
void RevertWhile(MachineInstr *MI) const;
void RevertLoopDec(MachineInstr *MI) const;
void RevertLoopEnd(MachineInstr *MI) const;
void Expand(MachineLoop *ML, MachineInstr *Start,
MachineInstr *InsertPt, MachineInstr *Dec,
MachineInstr *End, bool Revert);
};
}
@ -80,26 +86,28 @@ char ARMLowOverheadLoops::ID = 0;
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
if (!ST.hasLOB())
return false;
LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
MF = &mf;
LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
auto &MLI = getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
TII = static_cast<const ARMBaseInstrInfo*>(
MF.getSubtarget().getInstrInfo());
BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
MRI = &MF->getRegInfo();
TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
BBUtils->computeAllBlockSizes();
BBUtils->adjustBBOffsetsAfter(&MF.front());
BBUtils->adjustBBOffsetsAfter(&MF->front());
bool Changed = false;
for (auto ML : MLI) {
if (!ML->getParentLoop())
Changed |= ProcessLoop(ML);
}
Changed |= RevertNonLoops(MF);
Changed |= RevertNonLoops();
return Changed;
}
@ -108,6 +116,100 @@ static bool IsLoopStart(MachineInstr &MI) {
MI.getOpcode() == ARM::t2WhileLoopStart;
}
template<typename T>
static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) {
for(auto &MI : make_range(T(Begin), End)) {
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
continue;
return &MI;
}
}
return nullptr;
}
static MachineInstr* SearchForUse(MachineInstr *Begin,
MachineBasicBlock::iterator End,
unsigned Reg) {
for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
continue;
return &MI;
}
}
return nullptr;
}
// Is it safe to define LR with DLS/WLS?
// LR can defined if it is the operand to start, because it's the same value,
// or if it's going to be equivalent to the operand to Start.
MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
return MI->getOpcode() == ARM::tMOVr &&
MI->getOperand(0).getReg() == ARM::LR &&
MI->getOperand(1).getReg() == Reg &&
MI->getOperand(2).getImm() == ARMCC::AL;
};
MachineBasicBlock *MBB = Start->getParent();
unsigned CountReg = Start->getOperand(0).getReg();
// Walk forward and backward in the block to find the closest instructions
// that define LR. Then also filter them out if they're not a mov lr.
MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
PredLRDef = nullptr;
MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
SuccLRDef = nullptr;
// We've either found one, two or none mov lr instructions... Now figure out
// if they are performing the equilvant mov that the Start instruction will.
// Do this by scanning forward and backward to see if there's a def of the
// register holding the count value. If we find a suitable def, return it as
// the insert point. Later, if InsertPt != Start, then we can remove the
// redundant instruction.
if (SuccLRDef) {
MachineBasicBlock::iterator End(SuccLRDef);
if (!SearchForDef(Start, End, CountReg)) {
return SuccLRDef;
} else
SuccLRDef = nullptr;
}
if (PredLRDef) {
MachineBasicBlock::reverse_iterator End(PredLRDef);
if (!SearchForDef(Start, End, CountReg)) {
return PredLRDef;
} else
PredLRDef = nullptr;
}
// We can define LR because LR already contains the same value.
if (Start->getOperand(0).getReg() == ARM::LR)
return Start;
// We've found no suitable LR def and Start doesn't use LR directly. Can we
// just define LR anyway?
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
// Not if we've haven't found a suitable mov and LR is live out.
if (LiveRegs.contains(ARM::LR))
return nullptr;
// If LR is not live out, we can insert the instruction if nothing else
// uses LR after it.
if (!SearchForUse(Start, MBB->end(), ARM::LR))
return Start;
LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
<< " LR\n");
return nullptr;
}
bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
bool Changed = false;
@ -169,11 +271,13 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
End = &MI;
else if (IsLoopStart(MI))
Start = &MI;
else if (MI.getDesc().isCall())
else if (MI.getDesc().isCall()) {
// TODO: Though the call will require LE to execute again, does this
// mean we should revert? Always executing LE hopefully should be
// faster than performing a sub,cmp,br or even subs,br.
Revert = true;
LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
}
if (!Dec || End)
continue;
@ -237,7 +341,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
Revert = true;
}
Expand(ML, Start, Dec, End, Revert);
MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
if (!InsertPt) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
} else
LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
Expand(ML, Start, InsertPt, Dec, End, Revert);
return true;
}
@ -304,33 +415,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
}
void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
MachineInstr *InsertPt,
MachineInstr *Dec, MachineInstr *End,
bool Revert) {
auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
// The trip count should already been held in LR since the instructions
// within the loop can only read and write to LR. So, there should be a
// mov to setup the count. WLS/DLS perform this move, so find the original
// and delete it - inserting WLS/DLS in its place.
MachineBasicBlock *MBB = Start->getParent();
MachineInstr *InsertPt = Start;
for (auto &I : MRI->def_instructions(ARM::LR)) {
if (I.getParent() != MBB)
continue;
// Always execute.
if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
continue;
// Only handle move reg, if the trip count it will need moving into a reg
// before the setup instruction anyway.
if (!I.getDesc().isMoveReg() ||
!I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
continue;
InsertPt = &I;
break;
}
auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
MachineInstr *InsertPt) {
MachineBasicBlock *MBB = InsertPt->getParent();
unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
ARM::t2DLS : ARM::t2WLS;
MachineInstrBuilder MIB =
@ -389,18 +480,18 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
RevertLoopDec(Dec);
RevertLoopEnd(End);
} else {
Start = ExpandLoopStart(ML, Start);
Start = ExpandLoopStart(ML, Start, InsertPt);
RemoveDeadBranch(Start);
End = ExpandLoopEnd(ML, Dec, End);
RemoveDeadBranch(End);
}
}
bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
bool ARMLowOverheadLoops::RevertNonLoops() {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
bool Changed = false;
for (auto &MBB : MF) {
for (auto &MBB : *MF) {
SmallVector<MachineInstr*, 4> Starts;
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;

View File

@ -4,6 +4,9 @@
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
@ -15,10 +18,10 @@
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
%scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep7, align 4
store i32 %1, i32* %scevgep4, align 4
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
@ -44,7 +47,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -84,6 +87,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -96,9 +100,10 @@ body: |
bb.1.while.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
@ -108,4 +113,3 @@ body: |
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -9,7 +9,10 @@
# CHECK: bb.2.for.cond.cleanup:
# CHECK: bb.3.for.header:
--- |
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
entry:
call void @llvm.set.loop.iterations.i32(i32 %N)
@ -45,9 +48,11 @@
}
; Function Attrs: nounwind
declare i32 @llvm.arm.space(i32 immarg, i32) #0
declare i32 @llvm.arm.space(i32 immarg, i32) #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
@ -63,7 +68,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -128,6 +133,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.3(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -184,5 +190,3 @@ body: |
tB %bb.1, 14, $noreg
...

View File

@ -0,0 +1,115 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
# CHECK: $lr = t2DLS $r0
# CHECK-NOT: $lr = tMOVr $r0
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %while.body, label %while.end
while.end: ; preds = %while.body
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: do_copy
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
$lr = tMOVr killed $r0, 14, $noreg
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
bb.1.while.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
bb.2.while.end:
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -5,8 +5,6 @@
# CHECK-NOT: t2LEUpdate
--- |
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll'
source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
@ -35,15 +33,9 @@
declare i32 @bar(...) local_unnamed_addr #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #2
attributes #0 = { "target-features"="+mve.fp" }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind }
@ -57,7 +49,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -101,6 +93,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
liveins: $r0, $r4, $r5, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
@ -112,6 +105,7 @@ body: |
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
@ -119,6 +113,7 @@ body: |
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r4
$r5 = tMOVr killed $lr, 14, $noreg
tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
@ -129,6 +124,8 @@ body: |
tB %bb.3, 14, $noreg
bb.3.while.end:
liveins: $r4
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
@ -138,4 +135,3 @@ body: |
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
...

View File

@ -4,7 +4,10 @@
# CHECK-NOT: t2DLS
# CHECK-NOT: t2LEUpdate
--- |
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @mov_between_dec_end(i32 %n) #0 {
entry:
%cmp6 = icmp eq i32 %n, 0
@ -15,7 +18,6 @@
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 0
@ -27,10 +29,7 @@
ret i32 %res.0.lcssa
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
attributes #0 = { "target-features"="+mve.fp" }
@ -46,7 +45,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -56,11 +55,11 @@ frameInfo:
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
@ -78,51 +77,46 @@ stack:
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
liveins: $r0, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
frame-setup CFI_INSTRUCTION offset $r5, -12
frame-setup CFI_INSTRUCTION offset $r4, -16
tCBZ $r0, %bb.4
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r4
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r4, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $r4 = tMOVr $lr, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
liveins: $lr
$r0 = tMOVr killed $lr, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
bb.4:
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
renamable $lr = t2MOVi 0, 14, $noreg, $noreg
$r0 = tMOVr killed $lr, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -1,136 +0,0 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
# CHECK: while.body:
# CHECK-NOT: t2DLS
# CHECK-NOT: t2LEUpdate
--- |
define i32 @skip_spill(i32 %n) #0 {
entry:
%cmp6 = icmp eq i32 %n, 0
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
%add = add nsw i32 %call, %res.07
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%2 = icmp ne i32 %1, 0
br i1 %2, label %while.body, label %while.end
while.end: ; preds = %while.body, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
ret i32 %res.0.lcssa
}
declare i32 @bar(...) local_unnamed_addr #0
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #2
attributes #0 = { "target-features"="+mve.fp" }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind }
...
---
name: skip_spill
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
frame-setup CFI_INSTRUCTION offset $r5, -12
frame-setup CFI_INSTRUCTION offset $r4, -16
tCBZ $r0, %bb.4
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
$r5 = tMOVr killed $lr, 14, $noreg
tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
$lr = tMOVr killed $r5, 14, $noreg
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
bb.4:
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
...

View File

@ -4,7 +4,10 @@
# CHECK-NOT: t2DLS
# CHECK-NOT: t2LEUpdate
--- |
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @mov_between_dec_end(i32 %n) #0 {
entry:
%cmp6 = icmp eq i32 %n, 0
@ -15,7 +18,6 @@
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 2
@ -33,6 +35,9 @@
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #2
attributes #0 = { "target-features"="+mve.fp" }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind }
@ -46,7 +51,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -56,11 +61,11 @@ frameInfo:
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
@ -78,51 +83,45 @@ stack:
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
liveins: $r0, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
frame-setup CFI_INSTRUCTION offset $r5, -12
frame-setup CFI_INSTRUCTION offset $r4, -16
tCBZ $r0, %bb.4
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr
$r4 = tMOVr $lr, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $r0 = t2ADDri renamable $lr, 2, 14, $noreg, $noreg
$lr = tMOVr $r4, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
liveins: $r0
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
bb.4:
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -14,6 +14,9 @@
# CHECK: bb.4.while.end:
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) {
entry:
%cmp = icmp ugt i32 %N, 2
@ -23,19 +26,19 @@
%test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
br i1 %test, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry, %not.preheader
while.body.preheader: ; preds = %not.preheader, %entry
%scevgep = getelementptr i16, i16* %a, i32 -1
%scevgep3 = getelementptr i16, i16* %b, i32 -1
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
while.body: ; preds = %while.body, %while.body.preheader
%lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ]
%lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ]
%count = phi i32 [ %count.next, %while.body ], [ %N, %while.body.preheader ]
%scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
%scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
%load = load i16, i16* %scevgep6, align 2
store i16 %load, i16* %scevgep2, align 2
%scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
%scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
%load = load i16, i16* %scevgep4, align 2
store i16 %load, i16* %scevgep7, align 2
%count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
%cmp1 = icmp ne i32 %count.next, 0
%scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
@ -46,13 +49,8 @@
ret void
}
; Function Attrs: noduplicate nounwind
declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { noduplicate nounwind }
@ -67,7 +65,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -107,6 +105,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -118,21 +117,24 @@ body: |
bb.1.not.preheader:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
liveins: $lr, $r0, $r1
t2WhileLoopStart renamable $lr, %bb.4, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
bb.2.while.body.preheader:
successors: %bb.3(0x80000000)
liveins: $lr, $r0, $r1
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
bb.3.while.body:
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6)
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2)
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4)
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr
tB %bb.4, 14, $noreg

View File

@ -13,6 +13,9 @@
# CHECK-NEXT: tB %bb.3, 14
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) #0 {
entry:
%0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
@ -23,15 +26,15 @@
%scevgep5 = getelementptr i32, i32* %b, i32 -1
br label %do.body
do.body: ; preds = %do.body.preheader, %do.body
do.body: ; preds = %do.body, %do.body.preheader
%lsr.iv6 = phi i32* [ %scevgep5, %do.body.preheader ], [ %scevgep7, %do.body ]
%lsr.iv = phi i32* [ %scevgep2, %do.body.preheader ], [ %scevgep3, %do.body ]
%1 = phi i32 [ %2, %do.body ], [ %N, %do.body.preheader ]
%scevgep8 = getelementptr i32, i32* %lsr.iv6, i32 1
%scevgep4 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep = getelementptr i32, i32* %lsr.iv6, i32 1
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%size = call i32 @llvm.arm.space(i32 4096, i32 undef)
%tmp = load i32, i32* %scevgep8, align 4
store i32 %tmp, i32* %scevgep4, align 4
%tmp = load i32, i32* %scevgep, align 4
store i32 %tmp, i32* %scevgep1, align 4
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
%3 = icmp ne i32 %2, 0
%scevgep3 = getelementptr i32, i32* %lsr.iv, i32 1
@ -51,9 +54,6 @@
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { "target-features"="+lob" }
attributes #1 = { nounwind }
attributes #2 = { noduplicate nounwind }
@ -67,7 +67,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -107,6 +107,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.3(0x40000000)
liveins: $r1, $r2, $r3, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -117,6 +118,7 @@ body: |
bb.1.do.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r1, $r2, $r3
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
@ -124,10 +126,11 @@ body: |
bb.2.do.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
dead renamable $r2 = SPACE 4096, undef renamable $r0
renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep8)
early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep)
early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep1)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
@ -136,4 +139,3 @@ body: |
tPOP_RET 14, $noreg, def $r7, def $pc
...

View File

@ -0,0 +1,124 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
# CHECK: $lr = t2DLS $r0
# CHECK: $lr = tMOVr $r0, 14
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
# TODO: Explore the preheader to remove the redundant tMOVr
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %while.body, label %while.end
while.end: ; preds = %while.body
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: do_copy
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
bb.1.preheader:
successors: %bb.2(0x80000000)
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -0,0 +1,122 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
# CHECK-NOT: $lr = t2DLS
# CHECK: $lr = tMOVr $r0, 14
# CHECK-NOT: $lr = t2LEUpdate
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %while.body, label %while.end
while.end: ; preds = %while.body
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: do_copy
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
bb.1.preheader:
successors: %bb.2(0x80000000)
liveins: $r0, $lr
$lr = tMOVr $r0, 14, $noreg
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -0,0 +1,122 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
# CHECK-NOT: $lr = t2DLS
# CHECK: $lr = tMOVr $r0, 14
# CHECK-NOT: $lr = t2LEUpdate
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %preheader
preheader:
br label %while.body
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
store i32 %1, i32* %scevgep2, align 4
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%3 = icmp ne i32 %2, 0
br i1 %3, label %while.body, label %while.end
while.end: ; preds = %while.body
ret i32 0
}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: do_copy
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
renamable $r0 = t2SUBri killed renamable $lr, 4, 14, $noreg, def $cpsr
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
bb.1.preheader:
successors: %bb.2(0x80000000)
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
...

View File

@ -3,8 +3,6 @@
# CHECK-NOT: WhileLoopStart
--- |
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.ll'
source_filename = "while-size-limit.ll"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
@ -47,8 +45,10 @@
; Function Attrs: nounwind
declare i32 @llvm.arm.space(i32 immarg, i32) #1
; Function Attrs: noduplicate nounwind
declare i1 @llvm.test.set.loop.iterations.i32(i32) #2
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
@ -65,7 +65,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -130,6 +130,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -192,5 +193,3 @@ body: |
tB %bb.2, 14, $noreg
...

View File

@ -10,8 +10,6 @@
# CHECK: $lr = t2LEUpdate renamable $lr
--- |
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll'
source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main"
@ -25,14 +23,14 @@
%scevgep3 = getelementptr i16, i16* %b, i32 -1
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
while.body: ; preds = %while.body, %while.body.preheader
%lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ]
%lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ]
%1 = phi i32 [ %3, %while.body ], [ %N, %while.body.preheader ]
%scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
%scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
%2 = load i16, i16* %scevgep6, align 2, !tbaa !2
store i16 %2, i16* %scevgep2, align 2, !tbaa !2
%scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
%scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
%2 = load i16, i16* %scevgep4, align 2
store i16 %2, i16* %scevgep7, align 2
%3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
%4 = icmp ne i32 %3, 0
%scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
@ -48,15 +46,6 @@
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"min_enum_size", i32 4}
!2 = !{!3, !3, i64 0}
!3 = !{!"short", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
...
---
@ -67,7 +56,7 @@ legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
@ -107,6 +96,7 @@ machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.3(0x40000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
@ -117,6 +107,7 @@ body: |
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
@ -124,9 +115,10 @@ body: |
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6, !tbaa !2)
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2, !tbaa !2)
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4)
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
@ -135,4 +127,3 @@ body: |
tPOP_RET 14, $noreg, def $r7, def $pc
...