1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[ARM][LowOverheadLoops] Revert after read/write

Currently we check whether LR is stored/loaded to/from inbetween the
loop decrement and loop end pseudo instructions. There's two problems
here:
- It relies on all load/store instructions being labelled as such in
  tablegen.
- Actually any use of loop decrement is troublesome because the value
  doesn't exist!
    
So we need to check for any read/write of LR that occurs between the
two instructions and revert if we find anything.

Differential Revision: https://reviews.llvm.org/D65792

llvm-svn: 368130
This commit is contained in:
Sam Parker 2019-08-07 07:39:19 +00:00
parent b9b91527c5
commit c289d4459d
3 changed files with 274 additions and 13 deletions

View File

@ -11,8 +11,7 @@
/// The expectation is that the loop contains three pseudo instructions: /// The expectation is that the loop contains three pseudo instructions:
/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
/// form should be in the preheader, whereas the while form should be in the /// form should be in the preheader, whereas the while form should be in the
/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the /// preheaders only predecessor.
/// pre-preheader?
/// - t2LoopDec - placed within in the loop body. /// - t2LoopDec - placed within in the loop body.
/// - t2LoopEnd - the loop latch terminator. /// - t2LoopEnd - the loop latch terminator.
/// ///
@ -176,19 +175,25 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
// faster than performing a sub,cmp,br or even subs,br. // faster than performing a sub,cmp,br or even subs,br.
Revert = true; Revert = true;
if (!Dec) if (!Dec || End)
continue; continue;
// If we find that we load/store LR between LoopDec and LoopEnd, expect // If we find that LR has been written or read between LoopDec and
// that the decremented value has been spilled to the stack. Because // LoopEnd, expect that the decremented value is being used else where.
// this value isn't actually going to be produced until the latch, by LE, // Because this value isn't actually going to be produced until the
// we would need to generate a real sub. The value is also likely to be // latch, by LE, we would need to generate a real sub. The value is also
// reloaded for use of LoopEnd - in which in case we'd need to perform // likely to be copied/reloaded for use of LoopEnd - in which in case
// an add because it gets negated again by LE! The other option is to // we'd need to perform an add because it gets subtracted again by LE!
// then generate the other form of LE which doesn't perform the sub. // The other option is to then generate the other form of LE which doesn't
if (MI.mayLoad() || MI.mayStore()) // perform the sub.
Revert = for (auto &MO : MI.operands()) {
MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR; if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
MO.getReg() == ARM::LR) {
LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
Revert = true;
break;
}
}
} }
if (Dec && End && Revert) if (Dec && End && Revert)

View File

@ -0,0 +1,128 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
# CHECK: while.body:
# CHECK-NOT: t2DLS
# CHECK-NOT: t2LEUpdate
--- |
define i32 @mov_between_dec_end(i32 %n) #0 {
entry:
%cmp6 = icmp eq i32 %n, 0
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 0
%2 = icmp ne i32 %1, 0
br i1 %2, label %while.body, label %while.end
while.end: ; preds = %while.body, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
ret i32 %res.0.lcssa
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
attributes #0 = { "target-features"="+mve.fp" }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind }
...
---
name: mov_between_dec_end
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
frame-setup CFI_INSTRUCTION offset $r5, -12
frame-setup CFI_INSTRUCTION offset $r4, -16
tCBZ $r0, %bb.4
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $r4 = tMOVr $lr, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
bb.4:
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
...

View File

@ -0,0 +1,128 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
# CHECK: while.body:
# CHECK-NOT: t2DLS
# CHECK-NOT: t2LEUpdate
--- |
define i32 @mov_between_dec_end(i32 %n) #0 {
entry:
%cmp6 = icmp eq i32 %n, 0
br i1 %cmp6, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 2
%2 = icmp ne i32 %1, 0
br i1 %2, label %while.body, label %while.end
while.end: ; preds = %while.body, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
ret i32 %res.0.lcssa
}
; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
attributes #0 = { "target-features"="+mve.fp" }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind }
...
---
name: mov_between_dec_end
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
frame-setup CFI_INSTRUCTION offset $r5, -12
frame-setup CFI_INSTRUCTION offset $r4, -16
tCBZ $r0, %bb.4
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
$r4 = tMOVr $lr, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
$lr = tMOVr $r4, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.3.while.end:
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
bb.4:
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = tMOVr killed $r4, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
...