mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[ARM][LowOverheadLoops] Add LR def safety check
Converting the *LoopStart pseudo instructions into DLS/WLS results in LR being defined. These instructions were inserted on the assumption that LR would already contain the loop counter because a mov is introduced during ISel as the the consumers in the loop can only use LR. That assumption proved wrong! So perform a safety check, finding an appropriate place to insert the DLS/WLS instructions or revert if this isn't possible. Differential Revision: https://reviews.llvm.org/D67539 llvm-svn: 372111
This commit is contained in:
parent
baf8b97a90
commit
3151c652a4
@ -34,6 +34,7 @@ using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class ARMLowOverheadLoops : public MachineFunctionPass {
|
||||
MachineFunction *MF = nullptr;
|
||||
const ARMBaseInstrInfo *TII = nullptr;
|
||||
MachineRegisterInfo *MRI = nullptr;
|
||||
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
|
||||
@ -51,19 +52,6 @@ namespace {
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
bool ProcessLoop(MachineLoop *ML);
|
||||
|
||||
bool RevertNonLoops(MachineFunction &MF);
|
||||
|
||||
void RevertWhile(MachineInstr *MI) const;
|
||||
|
||||
void RevertLoopDec(MachineInstr *MI) const;
|
||||
|
||||
void RevertLoopEnd(MachineInstr *MI) const;
|
||||
|
||||
void Expand(MachineLoop *ML, MachineInstr *Start,
|
||||
MachineInstr *Dec, MachineInstr *End, bool Revert);
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
@ -72,6 +60,24 @@ namespace {
|
||||
StringRef getPassName() const override {
|
||||
return ARM_LOW_OVERHEAD_LOOPS_NAME;
|
||||
}
|
||||
|
||||
private:
|
||||
bool ProcessLoop(MachineLoop *ML);
|
||||
|
||||
MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
|
||||
|
||||
bool RevertNonLoops();
|
||||
|
||||
void RevertWhile(MachineInstr *MI) const;
|
||||
|
||||
void RevertLoopDec(MachineInstr *MI) const;
|
||||
|
||||
void RevertLoopEnd(MachineInstr *MI) const;
|
||||
|
||||
void Expand(MachineLoop *ML, MachineInstr *Start,
|
||||
MachineInstr *InsertPt, MachineInstr *Dec,
|
||||
MachineInstr *End, bool Revert);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@ -80,26 +86,28 @@ char ARMLowOverheadLoops::ID = 0;
|
||||
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
|
||||
false, false)
|
||||
|
||||
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
|
||||
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
|
||||
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
|
||||
if (!ST.hasLOB())
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
|
||||
MF = &mf;
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
|
||||
|
||||
auto &MLI = getAnalysis<MachineLoopInfo>();
|
||||
MRI = &MF.getRegInfo();
|
||||
TII = static_cast<const ARMBaseInstrInfo*>(
|
||||
MF.getSubtarget().getInstrInfo());
|
||||
BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
|
||||
MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
|
||||
MRI = &MF->getRegInfo();
|
||||
TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
|
||||
BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
|
||||
BBUtils->computeAllBlockSizes();
|
||||
BBUtils->adjustBBOffsetsAfter(&MF.front());
|
||||
BBUtils->adjustBBOffsetsAfter(&MF->front());
|
||||
|
||||
bool Changed = false;
|
||||
for (auto ML : MLI) {
|
||||
if (!ML->getParentLoop())
|
||||
Changed |= ProcessLoop(ML);
|
||||
}
|
||||
Changed |= RevertNonLoops(MF);
|
||||
Changed |= RevertNonLoops();
|
||||
return Changed;
|
||||
}
|
||||
|
||||
@ -108,6 +116,100 @@ static bool IsLoopStart(MachineInstr &MI) {
|
||||
MI.getOpcode() == ARM::t2WhileLoopStart;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) {
|
||||
for(auto &MI : make_range(T(Begin), End)) {
|
||||
for (auto &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
|
||||
continue;
|
||||
return &MI;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static MachineInstr* SearchForUse(MachineInstr *Begin,
|
||||
MachineBasicBlock::iterator End,
|
||||
unsigned Reg) {
|
||||
for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
|
||||
for (auto &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
|
||||
continue;
|
||||
return &MI;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Is it safe to define LR with DLS/WLS?
|
||||
// LR can defined if it is the operand to start, because it's the same value,
|
||||
// or if it's going to be equivalent to the operand to Start.
|
||||
MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
|
||||
|
||||
auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
|
||||
return MI->getOpcode() == ARM::tMOVr &&
|
||||
MI->getOperand(0).getReg() == ARM::LR &&
|
||||
MI->getOperand(1).getReg() == Reg &&
|
||||
MI->getOperand(2).getImm() == ARMCC::AL;
|
||||
};
|
||||
|
||||
MachineBasicBlock *MBB = Start->getParent();
|
||||
unsigned CountReg = Start->getOperand(0).getReg();
|
||||
// Walk forward and backward in the block to find the closest instructions
|
||||
// that define LR. Then also filter them out if they're not a mov lr.
|
||||
MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
|
||||
if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
|
||||
PredLRDef = nullptr;
|
||||
|
||||
MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
|
||||
if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
|
||||
SuccLRDef = nullptr;
|
||||
|
||||
// We've either found one, two or none mov lr instructions... Now figure out
|
||||
// if they are performing the equilvant mov that the Start instruction will.
|
||||
// Do this by scanning forward and backward to see if there's a def of the
|
||||
// register holding the count value. If we find a suitable def, return it as
|
||||
// the insert point. Later, if InsertPt != Start, then we can remove the
|
||||
// redundant instruction.
|
||||
if (SuccLRDef) {
|
||||
MachineBasicBlock::iterator End(SuccLRDef);
|
||||
if (!SearchForDef(Start, End, CountReg)) {
|
||||
return SuccLRDef;
|
||||
} else
|
||||
SuccLRDef = nullptr;
|
||||
}
|
||||
if (PredLRDef) {
|
||||
MachineBasicBlock::reverse_iterator End(PredLRDef);
|
||||
if (!SearchForDef(Start, End, CountReg)) {
|
||||
return PredLRDef;
|
||||
} else
|
||||
PredLRDef = nullptr;
|
||||
}
|
||||
|
||||
// We can define LR because LR already contains the same value.
|
||||
if (Start->getOperand(0).getReg() == ARM::LR)
|
||||
return Start;
|
||||
|
||||
// We've found no suitable LR def and Start doesn't use LR directly. Can we
|
||||
// just define LR anyway?
|
||||
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
||||
LivePhysRegs LiveRegs(*TRI);
|
||||
LiveRegs.addLiveOuts(*MBB);
|
||||
|
||||
// Not if we've haven't found a suitable mov and LR is live out.
|
||||
if (LiveRegs.contains(ARM::LR))
|
||||
return nullptr;
|
||||
|
||||
// If LR is not live out, we can insert the instruction if nothing else
|
||||
// uses LR after it.
|
||||
if (!SearchForUse(Start, MBB->end(), ARM::LR))
|
||||
return Start;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
|
||||
<< " LR\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
||||
|
||||
bool Changed = false;
|
||||
@ -169,11 +271,13 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
||||
End = &MI;
|
||||
else if (IsLoopStart(MI))
|
||||
Start = &MI;
|
||||
else if (MI.getDesc().isCall())
|
||||
else if (MI.getDesc().isCall()) {
|
||||
// TODO: Though the call will require LE to execute again, does this
|
||||
// mean we should revert? Always executing LE hopefully should be
|
||||
// faster than performing a sub,cmp,br or even subs,br.
|
||||
Revert = true;
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
|
||||
}
|
||||
|
||||
if (!Dec || End)
|
||||
continue;
|
||||
@ -237,7 +341,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
||||
Revert = true;
|
||||
}
|
||||
|
||||
Expand(ML, Start, Dec, End, Revert);
|
||||
MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
|
||||
if (!InsertPt) {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
|
||||
Revert = true;
|
||||
} else
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
|
||||
|
||||
Expand(ML, Start, InsertPt, Dec, End, Revert);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -304,33 +415,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
|
||||
}
|
||||
|
||||
void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
|
||||
MachineInstr *InsertPt,
|
||||
MachineInstr *Dec, MachineInstr *End,
|
||||
bool Revert) {
|
||||
|
||||
auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
|
||||
// The trip count should already been held in LR since the instructions
|
||||
// within the loop can only read and write to LR. So, there should be a
|
||||
// mov to setup the count. WLS/DLS perform this move, so find the original
|
||||
// and delete it - inserting WLS/DLS in its place.
|
||||
MachineBasicBlock *MBB = Start->getParent();
|
||||
MachineInstr *InsertPt = Start;
|
||||
for (auto &I : MRI->def_instructions(ARM::LR)) {
|
||||
if (I.getParent() != MBB)
|
||||
continue;
|
||||
|
||||
// Always execute.
|
||||
if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
|
||||
continue;
|
||||
|
||||
// Only handle move reg, if the trip count it will need moving into a reg
|
||||
// before the setup instruction anyway.
|
||||
if (!I.getDesc().isMoveReg() ||
|
||||
!I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
|
||||
continue;
|
||||
InsertPt = &I;
|
||||
break;
|
||||
}
|
||||
|
||||
auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
|
||||
MachineInstr *InsertPt) {
|
||||
MachineBasicBlock *MBB = InsertPt->getParent();
|
||||
unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
|
||||
ARM::t2DLS : ARM::t2WLS;
|
||||
MachineInstrBuilder MIB =
|
||||
@ -389,18 +480,18 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
|
||||
RevertLoopDec(Dec);
|
||||
RevertLoopEnd(End);
|
||||
} else {
|
||||
Start = ExpandLoopStart(ML, Start);
|
||||
Start = ExpandLoopStart(ML, Start, InsertPt);
|
||||
RemoveDeadBranch(Start);
|
||||
End = ExpandLoopEnd(ML, Dec, End);
|
||||
RemoveDeadBranch(End);
|
||||
}
|
||||
}
|
||||
|
||||
bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
|
||||
bool ARMLowOverheadLoops::RevertNonLoops() {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
|
||||
bool Changed = false;
|
||||
|
||||
for (auto &MBB : MF) {
|
||||
for (auto &MBB : *MF) {
|
||||
SmallVector<MachineInstr*, 4> Starts;
|
||||
SmallVector<MachineInstr*, 4> Decs;
|
||||
SmallVector<MachineInstr*, 4> Ends;
|
||||
|
@ -4,6 +4,9 @@
|
||||
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
|
||||
entry:
|
||||
%scevgep = getelementptr i32, i32* %q, i32 -1
|
||||
@ -15,10 +18,10 @@
|
||||
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
|
||||
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
|
||||
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
|
||||
%scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep7, align 4
|
||||
store i32 %1, i32* %scevgep4, align 4
|
||||
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep6, align 4
|
||||
store i32 %1, i32* %scevgep2, align 4
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
@ -44,7 +47,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -84,6 +87,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -96,9 +100,10 @@ body: |
|
||||
|
||||
bb.1.while.body:
|
||||
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
|
||||
tB %bb.2, 14, $noreg
|
||||
@ -108,4 +113,3 @@ body: |
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
||||
|
@ -9,7 +9,10 @@
|
||||
# CHECK: bb.2.for.cond.cleanup:
|
||||
# CHECK: bb.3.for.header:
|
||||
|
||||
--- |
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
|
||||
entry:
|
||||
call void @llvm.set.loop.iterations.i32(i32 %N)
|
||||
@ -45,9 +48,11 @@
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i32 @llvm.arm.space(i32 immarg, i32) #0
|
||||
declare i32 @llvm.arm.space(i32 immarg, i32) #0
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
@ -63,7 +68,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -128,6 +133,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.3(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r3, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -184,5 +190,3 @@ body: |
|
||||
tB %bb.1, 14, $noreg
|
||||
|
||||
...
|
||||
|
||||
|
||||
|
115
test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir
Normal file
115
test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir
Normal file
@ -0,0 +1,115 @@
|
||||
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
|
||||
# CHECK: $lr = t2DLS $r0
|
||||
# CHECK-NOT: $lr = tMOVr $r0
|
||||
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
|
||||
entry:
|
||||
%scevgep = getelementptr i32, i32* %q, i32 -1
|
||||
%scevgep3 = getelementptr i32, i32* %p, i32 -1
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
|
||||
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
|
||||
%0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
|
||||
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep6, align 4
|
||||
store i32 %1, i32* %scevgep2, align 4
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #0
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: do_copy
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
- { reg: '$r1', virtual-reg: '' }
|
||||
- { reg: '$r2', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
t2DoLoopStart $r0
|
||||
$lr = tMOVr killed $r0, 14, $noreg
|
||||
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
|
||||
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
|
||||
|
||||
bb.1.while.body:
|
||||
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
|
||||
tB %bb.2, 14, $noreg
|
||||
|
||||
bb.2.while.end:
|
||||
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
@ -5,8 +5,6 @@
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll'
|
||||
source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll"
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
@ -35,15 +33,9 @@
|
||||
|
||||
declare i32 @bar(...) local_unnamed_addr #0
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #2
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
attributes #1 = { noduplicate nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
@ -57,7 +49,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -101,6 +93,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $r0, $r4, $r5, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
@ -112,6 +105,7 @@ body: |
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
@ -119,6 +113,7 @@ body: |
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r4
|
||||
|
||||
$r5 = tMOVr killed $lr, 14, $noreg
|
||||
tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
|
||||
@ -129,6 +124,8 @@ body: |
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
liveins: $r4
|
||||
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
@ -138,4 +135,3 @@ body: |
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
||||
|
@ -4,7 +4,10 @@
|
||||
# CHECK-NOT: t2DLS
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
@ -15,7 +18,6 @@
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%add = add i32 %1, 0
|
||||
@ -27,10 +29,7 @@
|
||||
ret i32 %res.0.lcssa
|
||||
}
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
@ -46,7 +45,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -56,11 +55,11 @@ frameInfo:
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
@ -78,51 +77,46 @@ stack:
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $r0, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||
tCBZ $r0, %bb.4
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
t2DoLoopStart killed $r0
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r4
|
||||
|
||||
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
|
||||
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r4, 14, $noreg
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
renamable $r4 = tMOVr $lr, 14, $noreg
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
liveins: $lr
|
||||
|
||||
$r0 = tMOVr killed $lr, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
bb.4:
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
renamable $lr = t2MOVi 0, 14, $noreg, $noreg
|
||||
$r0 = tMOVr killed $lr, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
||||
|
@ -1,136 +0,0 @@
|
||||
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
|
||||
|
||||
# CHECK: while.body:
|
||||
# CHECK-NOT: t2DLS
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
define i32 @skip_spill(i32 %n) #0 {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
br i1 %cmp6, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||
%call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
|
||||
%add = add nsw i32 %call, %res.07
|
||||
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%2 = icmp ne i32 %1, 0
|
||||
br i1 %2, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
|
||||
ret i32 %res.0.lcssa
|
||||
}
|
||||
|
||||
declare i32 @bar(...) local_unnamed_addr #0
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #2
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
attributes #1 = { noduplicate nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: skip_spill
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||
tCBZ $r0, %bb.4
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
t2DoLoopStart killed $r0
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
|
||||
$r5 = tMOVr killed $lr, 14, $noreg
|
||||
tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
|
||||
$lr = tMOVr killed $r5, 14, $noreg
|
||||
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
bb.4:
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
@ -4,7 +4,10 @@
|
||||
# CHECK-NOT: t2DLS
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
@ -15,7 +18,6 @@
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%add = add i32 %1, 2
|
||||
@ -33,6 +35,9 @@
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #2
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
attributes #1 = { noduplicate nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
@ -46,7 +51,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -56,11 +61,11 @@ frameInfo:
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
@ -78,51 +83,45 @@ stack:
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $r0, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||
tCBZ $r0, %bb.4
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
t2DoLoopStart killed $r0
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr
|
||||
|
||||
$r4 = tMOVr $lr, 14, $noreg
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
renamable $r0 = t2ADDri renamable $lr, 2, 14, $noreg, $noreg
|
||||
$lr = tMOVr $r4, 14, $noreg
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
liveins: $r0
|
||||
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
bb.4:
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
||||
|
@ -14,6 +14,9 @@
|
||||
# CHECK: bb.4.while.end:
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) {
|
||||
entry:
|
||||
%cmp = icmp ugt i32 %N, 2
|
||||
@ -23,19 +26,19 @@
|
||||
%test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
|
||||
br i1 %test, label %while.body.preheader, label %while.end
|
||||
|
||||
while.body.preheader: ; preds = %entry, %not.preheader
|
||||
while.body.preheader: ; preds = %not.preheader, %entry
|
||||
%scevgep = getelementptr i16, i16* %a, i32 -1
|
||||
%scevgep3 = getelementptr i16, i16* %b, i32 -1
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ]
|
||||
%lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ]
|
||||
%count = phi i32 [ %count.next, %while.body ], [ %N, %while.body.preheader ]
|
||||
%scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
%scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
|
||||
%load = load i16, i16* %scevgep6, align 2
|
||||
store i16 %load, i16* %scevgep2, align 2
|
||||
%scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
%scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
|
||||
%load = load i16, i16* %scevgep4, align 2
|
||||
store i16 %load, i16* %scevgep7, align 2
|
||||
%count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
|
||||
%cmp1 = icmp ne i32 %count.next, 0
|
||||
%scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
@ -46,13 +49,8 @@
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
@ -67,7 +65,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -107,6 +105,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -118,21 +117,24 @@ body: |
|
||||
|
||||
bb.1.not.preheader:
|
||||
successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
t2WhileLoopStart renamable $lr, %bb.4, implicit-def dead $cpsr
|
||||
tB %bb.2, 14, $noreg
|
||||
|
||||
bb.2.while.body.preheader:
|
||||
successors: %bb.3(0x80000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
|
||||
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
|
||||
|
||||
bb.3.while.body:
|
||||
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2)
|
||||
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4)
|
||||
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr
|
||||
tB %bb.4, 14, $noreg
|
||||
|
@ -13,6 +13,9 @@
|
||||
# CHECK-NEXT: tB %bb.3, 14
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) #0 {
|
||||
entry:
|
||||
%0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
|
||||
@ -23,15 +26,15 @@
|
||||
%scevgep5 = getelementptr i32, i32* %b, i32 -1
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.body.preheader, %do.body
|
||||
do.body: ; preds = %do.body, %do.body.preheader
|
||||
%lsr.iv6 = phi i32* [ %scevgep5, %do.body.preheader ], [ %scevgep7, %do.body ]
|
||||
%lsr.iv = phi i32* [ %scevgep2, %do.body.preheader ], [ %scevgep3, %do.body ]
|
||||
%1 = phi i32 [ %2, %do.body ], [ %N, %do.body.preheader ]
|
||||
%scevgep8 = getelementptr i32, i32* %lsr.iv6, i32 1
|
||||
%scevgep4 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep = getelementptr i32, i32* %lsr.iv6, i32 1
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%size = call i32 @llvm.arm.space(i32 4096, i32 undef)
|
||||
%tmp = load i32, i32* %scevgep8, align 4
|
||||
store i32 %tmp, i32* %scevgep4, align 4
|
||||
%tmp = load i32, i32* %scevgep, align 4
|
||||
store i32 %tmp, i32* %scevgep1, align 4
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
|
||||
%3 = icmp ne i32 %2, 0
|
||||
%scevgep3 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
@ -51,9 +54,6 @@
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { "target-features"="+lob" }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { noduplicate nounwind }
|
||||
@ -67,7 +67,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -107,6 +107,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x40000000), %bb.3(0x40000000)
|
||||
liveins: $r1, $r2, $r3, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -117,6 +118,7 @@ body: |
|
||||
|
||||
bb.1.do.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r1, $r2, $r3
|
||||
|
||||
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
|
||||
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
|
||||
@ -124,10 +126,11 @@ body: |
|
||||
|
||||
bb.2.do.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
dead renamable $r2 = SPACE 4096, undef renamable $r0
|
||||
renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep8)
|
||||
early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
|
||||
renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep)
|
||||
early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep1)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
@ -136,4 +139,3 @@ body: |
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc
|
||||
|
||||
...
|
||||
|
||||
|
124
test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
Normal file
124
test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
Normal file
@ -0,0 +1,124 @@
|
||||
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
|
||||
# CHECK: $lr = t2DLS $r0
|
||||
# CHECK: $lr = tMOVr $r0, 14
|
||||
# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
|
||||
|
||||
# TODO: Explore the preheader to remove the redundant tMOVr
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
|
||||
entry:
|
||||
%scevgep = getelementptr i32, i32* %q, i32 -1
|
||||
%scevgep3 = getelementptr i32, i32* %p, i32 -1
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %preheader
|
||||
|
||||
preheader:
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
|
||||
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
|
||||
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
|
||||
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep6, align 4
|
||||
store i32 %1, i32* %scevgep2, align 4
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #0
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: do_copy
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
- { reg: '$r1', virtual-reg: '' }
|
||||
- { reg: '$r2', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
t2DoLoopStart $r0
|
||||
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
|
||||
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
|
||||
|
||||
bb.1.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
122
test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir
Normal file
122
test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir
Normal file
@ -0,0 +1,122 @@
|
||||
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
|
||||
# CHECK-NOT: $lr = t2DLS
|
||||
# CHECK: $lr = tMOVr $r0, 14
|
||||
# CHECK-NOT: $lr = t2LEUpdate
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
|
||||
entry:
|
||||
%scevgep = getelementptr i32, i32* %q, i32 -1
|
||||
%scevgep3 = getelementptr i32, i32* %p, i32 -1
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %preheader
|
||||
|
||||
preheader:
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
|
||||
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
|
||||
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
|
||||
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep6, align 4
|
||||
store i32 %1, i32* %scevgep2, align 4
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #0
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: do_copy
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
- { reg: '$r1', virtual-reg: '' }
|
||||
- { reg: '$r2', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
t2DoLoopStart $r0
|
||||
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
|
||||
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
|
||||
|
||||
bb.1.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0, $lr
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
122
test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
Normal file
122
test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
Normal file
@ -0,0 +1,122 @@
|
||||
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
|
||||
# CHECK-NOT: $lr = t2DLS
|
||||
# CHECK: $lr = tMOVr $r0, 14
|
||||
# CHECK-NOT: $lr = t2LEUpdate
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
|
||||
entry:
|
||||
%scevgep = getelementptr i32, i32* %q, i32 -1
|
||||
%scevgep3 = getelementptr i32, i32* %p, i32 -1
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %preheader
|
||||
|
||||
preheader:
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %entry
|
||||
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
|
||||
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
|
||||
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
|
||||
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%1 = load i32, i32* %scevgep6, align 4
|
||||
store i32 %1, i32* %scevgep2, align 4
|
||||
%scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
|
||||
%scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
|
||||
%2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #0
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: do_copy
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
- { reg: '$r1', virtual-reg: '' }
|
||||
- { reg: '$r2', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
t2DoLoopStart $r0
|
||||
renamable $r0 = t2SUBri killed renamable $lr, 4, 14, $noreg, def $cpsr
|
||||
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
|
||||
|
||||
bb.1.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6)
|
||||
early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
@ -3,8 +3,6 @@
|
||||
# CHECK-NOT: WhileLoopStart
|
||||
|
||||
--- |
|
||||
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.ll'
|
||||
source_filename = "while-size-limit.ll"
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
@ -47,8 +45,10 @@
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i32 @llvm.arm.space(i32 immarg, i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i1 @llvm.test.set.loop.iterations.i32(i32) #2
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
|
||||
|
||||
@ -65,7 +65,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -130,6 +130,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x80000000)
|
||||
liveins: $r0, $r1, $r2, $r3, $r4, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -192,5 +193,3 @@ body: |
|
||||
tB %bb.2, 14, $noreg
|
||||
|
||||
...
|
||||
|
||||
|
||||
|
@ -10,8 +10,6 @@
|
||||
# CHECK: $lr = t2LEUpdate renamable $lr
|
||||
|
||||
--- |
|
||||
; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll'
|
||||
source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll"
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main"
|
||||
|
||||
@ -25,14 +23,14 @@
|
||||
%scevgep3 = getelementptr i16, i16* %b, i32 -1
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ]
|
||||
%lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ]
|
||||
%1 = phi i32 [ %3, %while.body ], [ %N, %while.body.preheader ]
|
||||
%scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
%scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
|
||||
%2 = load i16, i16* %scevgep6, align 2, !tbaa !2
|
||||
store i16 %2, i16* %scevgep2, align 2, !tbaa !2
|
||||
%scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
%scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
|
||||
%2 = load i16, i16* %scevgep4, align 2
|
||||
store i16 %2, i16* %scevgep7, align 2
|
||||
%3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
|
||||
%4 = icmp ne i32 %3, 0
|
||||
%scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
|
||||
@ -48,15 +46,6 @@
|
||||
|
||||
attributes #0 = { noduplicate nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, !"min_enum_size", i32 4}
|
||||
!2 = !{!3, !3, i64 0}
|
||||
!3 = !{!"short", !4, i64 0}
|
||||
!4 = !{!"omnipotent char", !5, i64 0}
|
||||
!5 = !{!"Simple C/C++ TBAA"}
|
||||
|
||||
...
|
||||
---
|
||||
@ -67,7 +56,7 @@ legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
@ -107,6 +96,7 @@ machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x40000000), %bb.3(0x40000000)
|
||||
liveins: $r0, $r1, $r2, $r7, $lr
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
@ -117,6 +107,7 @@ body: |
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
liveins: $r0, $r1, $r2
|
||||
|
||||
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
|
||||
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
|
||||
@ -124,9 +115,10 @@ body: |
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
liveins: $lr, $r0, $r1
|
||||
|
||||
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6, !tbaa !2)
|
||||
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2, !tbaa !2)
|
||||
renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4)
|
||||
early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
@ -135,4 +127,3 @@ body: |
|
||||
tPOP_RET 14, $noreg, def $r7, def $pc
|
||||
|
||||
...
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user