1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00
llvm-mirror/lib/CodeGen/MachineLoopUtils.cpp
Sjoerd Meijer 78412f523a [ARM][LowOverheadLoops] Remove dead loop update instructions.
After creating a low-overhead loop, the loop update instruction was still
lingering around hurting performance. This removes dead loop update
instructions, which in our case are mostly SUBS instructions.

To support this, some helper functions were added to MachineLoopUtils and
ReachingDefAnalysis to analyse live-ins of loop exit blocks and find uses
before a particular loop instruction, respectively.

This is a first version that removes a SUBS instruction when there are no other
uses inside and outside the loop block, but there are some more interesting
cases in test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll which
shows that there is room for improvement. For example, we can't handle this
case yet:

    ..
    dlstp.32  lr, r2
  .LBB0_1:
    mov r3, r2
    subs  r2, #4
    vldrh.u32 q2, [r1], #8
    vmov  q1, q0
    vmla.u32  q0, q2, r0
    letp  lr, .LBB0_1
  @ %bb.2:
    vctp.32 r3
    ..

which is a lot more tricky because r2 is not only used by the subs, but also by
the mov to r3, which is used outside the low-overhead loop by the vctp
instruction, and that requires a bit of a different approach, and I will follow
up on this.

Differential Revision: https://reviews.llvm.org/D71007
2019-12-11 10:20:19 +00:00

145 lines
5.2 KiB
C++

//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
namespace {
// MI's parent and BB are clones of each other. Find the equivalent copy of MI
// in BB.
MachineInstr &findEquivalentInstruction(MachineInstr &MI,
MachineBasicBlock *BB) {
MachineBasicBlock *PB = MI.getParent();
unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI));
return *std::next(BB->instr_begin(), Offset);
}
} // namespace
MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
MachineBasicBlock *Loop,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
MachineFunction &MF = *Loop->getParent();
MachineBasicBlock *Preheader = *Loop->pred_begin();
if (Preheader == Loop)
Preheader = *std::next(Loop->pred_begin());
MachineBasicBlock *Exit = *Loop->succ_begin();
if (Exit == Loop)
Exit = *std::next(Loop->succ_begin());
MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock());
if (Direction == LPD_Front)
MF.insert(Loop->getIterator(), NewBB);
else
MF.insert(std::next(Loop->getIterator()), NewBB);
// FIXME: Add DenseMapInfo trait for Register so we can use it as a key.
DenseMap<unsigned, Register> Remaps;
auto InsertPt = NewBB->end();
for (MachineInstr &MI : *Loop) {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
NewBB->insert(InsertPt, NewMI);
for (MachineOperand &MO : NewMI->defs()) {
Register OrigR = MO.getReg();
if (OrigR.isPhysical())
continue;
Register &R = Remaps[OrigR];
R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
MO.setReg(R);
if (Direction == LPD_Back) {
// Replace all uses outside the original loop with the new register.
// FIXME: is the use_iterator stable enough to mutate register uses
// while iterating?
SmallVector<MachineOperand *, 4> Uses;
for (auto &Use : MRI.use_operands(OrigR))
if (Use.getParent()->getParent() != Loop)
Uses.push_back(&Use);
for (auto *Use : Uses) {
MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
Use->setReg(R);
}
}
}
}
for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I)
for (MachineOperand &MO : I->uses())
if (MO.isReg() && Remaps.count(MO.getReg()))
MO.setReg(Remaps[MO.getReg()]);
for (auto I = NewBB->begin(); I->isPHI(); ++I) {
MachineInstr &MI = *I;
unsigned LoopRegIdx = 3, InitRegIdx = 1;
if (MI.getOperand(2).getMBB() != Preheader)
std::swap(LoopRegIdx, InitRegIdx);
MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop);
assert(OrigPhi.isPHI());
if (Direction == LPD_Front) {
// When peeling front, we are only left with the initial value from the
// preheader.
Register R = MI.getOperand(LoopRegIdx).getReg();
if (Remaps.count(R))
R = Remaps[R];
OrigPhi.getOperand(InitRegIdx).setReg(R);
MI.RemoveOperand(LoopRegIdx + 1);
MI.RemoveOperand(LoopRegIdx + 0);
} else {
// When peeling back, the initial value is the loop-carried value from
// the original loop.
Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
MI.getOperand(LoopRegIdx).setReg(LoopReg);
MI.RemoveOperand(InitRegIdx + 1);
MI.RemoveOperand(InitRegIdx + 0);
}
}
DebugLoc DL;
if (Direction == LPD_Front) {
Preheader->replaceSuccessor(Loop, NewBB);
NewBB->addSuccessor(Loop);
Loop->replacePhiUsesWith(Preheader, NewBB);
if (TII->removeBranch(*Preheader) > 0)
TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
TII->removeBranch(*NewBB);
TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
} else {
Loop->replaceSuccessor(Exit, NewBB);
Exit->replacePhiUsesWith(Loop, NewBB);
NewBB->addSuccessor(Exit);
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond);
(void)CanAnalyzeBr;
assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
TII->removeBranch(*Loop);
TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB,
FBB == Exit ? NewBB : FBB, Cond, DL);
if (TII->removeBranch(*NewBB) > 0)
TII->insertBranch(*NewBB, Exit, nullptr, {}, DL);
}
return NewBB;
}
bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
SmallVector<MachineBasicBlock *, 4> ExitBlocks;
Loop->getExitBlocks(ExitBlocks);
for (auto *MBB : ExitBlocks)
if (MBB->isLiveIn(PhysReg))
return true;
return false;
}