1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[ARM] Fix the operand used for WLS in ARMLowOverheadLoops

The Loop start instruction handled by the ARMLowOverheadLoops are:
$lr = t2DoLoopStart $r0
$lr = t2DoLoopStartTP $r1, $r0
$lr = t2WhileLoopStartLR $r0, %bb, implicit-def dead $cpsr
All three of these will have LR as the 0 argument, the trip count as the
1 argument.

This patch updated a few places in ARMLowOverheadLoops where the 0th arg
was being used for t2WhileLoopStartLR instructions as the trip count.
One place was entirely removed as it does not seem valid any more, the
case the code is trying to protect against should not be able to occur
with our correct-by-construction low overhead loops.

Differential Revision: https://reviews.llvm.org/D102620
This commit is contained in:
David Green 2021-05-21 09:29:30 +01:00
parent ebb8c67ccd
commit c47364d339
4 changed files with 16 additions and 51 deletions

View File

@ -630,25 +630,6 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return false;
}
// Check that creating a [W|D]LSTP, which will define LR with an element
// count instead of iteration count, won't affect any other instructions
// than the LoopStart and LoopDec.
// TODO: We should try to insert the [W|D]LSTP after any of the other uses.
Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
: Start->getOperand(0).getReg();
if (StartInsertPt == Start && StartReg == ARM::LR) {
if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
SmallPtrSet<MachineInstr *, 2> Uses;
RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
for (auto *Use : Uses) {
if (Use != Start && Use != Dec) {
LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
return false;
}
}
}
}
// For tail predication, we need to provide the number of elements, instead
// of the iteration count, to the loop start instruction. The number of
// elements is provided to the vctp instruction, so we need to check that
@ -1410,8 +1391,7 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
MachineInstr *Def =
RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 1);
if (!Def) {
LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
return;

View File

@ -142,21 +142,18 @@ body: |
; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: liveins: $r0, $r1, $r2
; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.do.end:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
@ -246,21 +243,18 @@ body: |
; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: liveins: $r0, $r1, $r2
; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.do.end:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:

View File

@ -146,8 +146,6 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16
; CHECK: renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.5)
; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r5, dead $cpsr = tADDi3 renamable $r4, 3, 14 /* CC::al */, $noreg
; CHECK: dead renamable $r5, dead $cpsr = tLSRri killed renamable $r5, 2, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_WLSTP_32 killed renamable $r4, %bb.3
; CHECK: bb.1.for.body.lr.ph:
; CHECK: successors: %bb.2(0x80000000)

View File

@ -325,20 +325,13 @@ define void @twoloops(i32* %X, i32 %n, i32 %m) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r1, r2, #15
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r1, r1, #16
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: lsr.w lr, r1, #4
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r12, lr
; CHECK-NEXT: wls lr, lr, .LBB13_2
; CHECK-NEXT: wlstp.8 lr, r3, .LBB13_2
; CHECK-NEXT: .LBB13_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r3
; CHECK-NEXT: subs r3, #16
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrbt.8 q0, [r1], #16
; CHECK-NEXT: le lr, .LBB13_1
; CHECK-NEXT: vstrb.8 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB13_1
; CHECK-NEXT: .LBB13_2: @ %entry
; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_4
; CHECK-NEXT: .LBB13_3: @ =>This Inner Loop Header: Depth=1