mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[ARM][LowOverheadLoops] Revert after read/write
Currently we check whether LR is stored/loaded to/from inbetween the loop decrement and loop end pseudo instructions. There's two problems here: - It relies on all load/store instructions being labelled as such in tablegen. - Actually any use of loop decrement is troublesome because the value doesn't exist! So we need to check for any read/write of LR that occurs between the two instructions and revert if we find anything. Differential Revision: https://reviews.llvm.org/D65792 llvm-svn: 368130
This commit is contained in:
parent
b9b91527c5
commit
c289d4459d
@ -11,8 +11,7 @@
|
|||||||
/// The expectation is that the loop contains three pseudo instructions:
|
/// The expectation is that the loop contains three pseudo instructions:
|
||||||
/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
|
/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
|
||||||
/// form should be in the preheader, whereas the while form should be in the
|
/// form should be in the preheader, whereas the while form should be in the
|
||||||
/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
|
/// preheaders only predecessor.
|
||||||
/// pre-preheader?
|
|
||||||
/// - t2LoopDec - placed within in the loop body.
|
/// - t2LoopDec - placed within in the loop body.
|
||||||
/// - t2LoopEnd - the loop latch terminator.
|
/// - t2LoopEnd - the loop latch terminator.
|
||||||
///
|
///
|
||||||
@ -176,19 +175,25 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
|||||||
// faster than performing a sub,cmp,br or even subs,br.
|
// faster than performing a sub,cmp,br or even subs,br.
|
||||||
Revert = true;
|
Revert = true;
|
||||||
|
|
||||||
if (!Dec)
|
if (!Dec || End)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// If we find that we load/store LR between LoopDec and LoopEnd, expect
|
// If we find that LR has been written or read between LoopDec and
|
||||||
// that the decremented value has been spilled to the stack. Because
|
// LoopEnd, expect that the decremented value is being used else where.
|
||||||
// this value isn't actually going to be produced until the latch, by LE,
|
// Because this value isn't actually going to be produced until the
|
||||||
// we would need to generate a real sub. The value is also likely to be
|
// latch, by LE, we would need to generate a real sub. The value is also
|
||||||
// reloaded for use of LoopEnd - in which in case we'd need to perform
|
// likely to be copied/reloaded for use of LoopEnd - in which in case
|
||||||
// an add because it gets negated again by LE! The other option is to
|
// we'd need to perform an add because it gets subtracted again by LE!
|
||||||
// then generate the other form of LE which doesn't perform the sub.
|
// The other option is to then generate the other form of LE which doesn't
|
||||||
if (MI.mayLoad() || MI.mayStore())
|
// perform the sub.
|
||||||
Revert =
|
for (auto &MO : MI.operands()) {
|
||||||
MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
|
if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
|
||||||
|
MO.getReg() == ARM::LR) {
|
||||||
|
LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
|
||||||
|
Revert = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Dec && End && Revert)
|
if (Dec && End && Revert)
|
||||||
|
128
test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
Normal file
128
test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
|
||||||
|
|
||||||
|
# CHECK: while.body:
|
||||||
|
# CHECK-NOT: t2DLS
|
||||||
|
# CHECK-NOT: t2LEUpdate
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||||
|
entry:
|
||||||
|
%cmp6 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp6, label %while.end, label %while.body.preheader
|
||||||
|
|
||||||
|
while.body.preheader: ; preds = %entry
|
||||||
|
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||||
|
br label %while.body
|
||||||
|
|
||||||
|
while.body: ; preds = %while.body, %while.body.preheader
|
||||||
|
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||||
|
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||||
|
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||||
|
%add = add i32 %1, 0
|
||||||
|
%2 = icmp ne i32 %1, 0
|
||||||
|
br i1 %2, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.end: ; preds = %while.body, %entry
|
||||||
|
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
|
||||||
|
ret i32 %res.0.lcssa
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noduplicate nounwind
|
||||||
|
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||||
|
|
||||||
|
; Function Attrs: noduplicate nounwind
|
||||||
|
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+mve.fp" }
|
||||||
|
attributes #1 = { noduplicate nounwind }
|
||||||
|
attributes #2 = { nounwind }
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: mov_between_dec_end
|
||||||
|
alignment: 1
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: false
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins:
|
||||||
|
- { reg: '$r0', virtual-reg: '' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 16
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 4
|
||||||
|
adjustsStack: true
|
||||||
|
hasCalls: true
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 0
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack:
|
||||||
|
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||||
|
|
||||||
|
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||||
|
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||||
|
tCBZ $r0, %bb.4
|
||||||
|
|
||||||
|
bb.1.while.body.preheader:
|
||||||
|
successors: %bb.2(0x80000000)
|
||||||
|
|
||||||
|
$lr = tMOVr $r0, 14, $noreg
|
||||||
|
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||||
|
t2DoLoopStart killed $r0
|
||||||
|
|
||||||
|
bb.2.while.body:
|
||||||
|
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||||
|
|
||||||
|
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
|
||||||
|
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||||
|
renamable $r4 = tMOVr $lr, 14, $noreg
|
||||||
|
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||||
|
tB %bb.3, 14, $noreg
|
||||||
|
|
||||||
|
bb.3.while.end:
|
||||||
|
$r0 = tMOVr killed $r4, 14, $noreg
|
||||||
|
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||||
|
|
||||||
|
bb.4:
|
||||||
|
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||||
|
$r0 = tMOVr killed $r4, 14, $noreg
|
||||||
|
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
128
test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
Normal file
128
test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
|
||||||
|
|
||||||
|
# CHECK: while.body:
|
||||||
|
# CHECK-NOT: t2DLS
|
||||||
|
# CHECK-NOT: t2LEUpdate
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||||
|
entry:
|
||||||
|
%cmp6 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp6, label %while.end, label %while.body.preheader
|
||||||
|
|
||||||
|
while.body.preheader: ; preds = %entry
|
||||||
|
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||||
|
br label %while.body
|
||||||
|
|
||||||
|
while.body: ; preds = %while.body, %while.body.preheader
|
||||||
|
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||||
|
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||||
|
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||||
|
%add = add i32 %1, 2
|
||||||
|
%2 = icmp ne i32 %1, 0
|
||||||
|
br i1 %2, label %while.body, label %while.end
|
||||||
|
|
||||||
|
while.end: ; preds = %while.body, %entry
|
||||||
|
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
|
||||||
|
ret i32 %res.0.lcssa
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noduplicate nounwind
|
||||||
|
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||||
|
|
||||||
|
; Function Attrs: noduplicate nounwind
|
||||||
|
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+mve.fp" }
|
||||||
|
attributes #1 = { noduplicate nounwind }
|
||||||
|
attributes #2 = { nounwind }
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: mov_between_dec_end
|
||||||
|
alignment: 1
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: false
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins:
|
||||||
|
- { reg: '$r0', virtual-reg: '' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 16
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 4
|
||||||
|
adjustsStack: true
|
||||||
|
hasCalls: true
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 0
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack:
|
||||||
|
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||||
|
|
||||||
|
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||||
|
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||||
|
tCBZ $r0, %bb.4
|
||||||
|
|
||||||
|
bb.1.while.body.preheader:
|
||||||
|
successors: %bb.2(0x80000000)
|
||||||
|
|
||||||
|
$lr = tMOVr $r0, 14, $noreg
|
||||||
|
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||||
|
t2DoLoopStart killed $r0
|
||||||
|
|
||||||
|
bb.2.while.body:
|
||||||
|
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||||
|
|
||||||
|
$r4 = tMOVr $lr, 14, $noreg
|
||||||
|
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||||
|
$lr = tMOVr $r4, 14, $noreg
|
||||||
|
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||||
|
tB %bb.3, 14, $noreg
|
||||||
|
|
||||||
|
bb.3.while.end:
|
||||||
|
$r0 = tMOVr killed $r4, 14, $noreg
|
||||||
|
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||||
|
|
||||||
|
bb.4:
|
||||||
|
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||||
|
$r0 = tMOVr killed $r4, 14, $noreg
|
||||||
|
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
Loading…
Reference in New Issue
Block a user