mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
b47792a75b
Debug info is currently preventing VPT block creation, leading to different codegen. This patch attempts to skip any debug instructions during vpt block creation, making sure they do not interfere. Differential Revision: https://reviews.llvm.org/D103610
338 lines
11 KiB
C++
338 lines
11 KiB
C++
//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "ARM.h"
|
|
#include "ARMMachineFunctionInfo.h"
|
|
#include "ARMSubtarget.h"
|
|
#include "MCTargetDesc/ARMBaseInfo.h"
|
|
#include "Thumb2InstrInfo.h"
|
|
#include "llvm/ADT/SmallSet.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineInstrBundle.h"
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
#include "llvm/IR/DebugLoc.h"
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include <cassert>
|
|
#include <new>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "arm-mve-vpt"
|
|
|
|
namespace {
|
|
class MVEVPTBlock : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
const Thumb2InstrInfo *TII;
|
|
const TargetRegisterInfo *TRI;
|
|
|
|
MVEVPTBlock() : MachineFunctionPass(ID) {}
|
|
|
|
bool runOnMachineFunction(MachineFunction &Fn) override;
|
|
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
return MachineFunctionProperties().set(
|
|
MachineFunctionProperties::Property::NoVRegs);
|
|
}
|
|
|
|
StringRef getPassName() const override {
|
|
return "MVE VPT block insertion pass";
|
|
}
|
|
|
|
private:
|
|
bool InsertVPTBlocks(MachineBasicBlock &MBB);
|
|
};
|
|
|
|
char MVEVPTBlock::ID = 0;
|
|
|
|
} // end anonymous namespace
|
|
|
|
INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
|
|
|
|
static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
|
|
const TargetRegisterInfo *TRI,
|
|
unsigned &NewOpcode) {
|
|
// Search backwards to the instruction that defines VPR. This may or not
|
|
// be a VCMP, we check that after this loop. If we find another instruction
|
|
// that reads cpsr, we return nullptr.
|
|
MachineBasicBlock::iterator CmpMI = MI;
|
|
while (CmpMI != MI->getParent()->begin()) {
|
|
--CmpMI;
|
|
if (CmpMI->modifiesRegister(ARM::VPR, TRI))
|
|
break;
|
|
if (CmpMI->readsRegister(ARM::VPR, TRI))
|
|
break;
|
|
}
|
|
|
|
if (CmpMI == MI)
|
|
return nullptr;
|
|
NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
|
|
if (NewOpcode == 0)
|
|
return nullptr;
|
|
|
|
// Search forward from CmpMI to MI, checking if either register was def'd
|
|
if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
|
|
MI, TRI))
|
|
return nullptr;
|
|
if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
|
|
MI, TRI))
|
|
return nullptr;
|
|
return &*CmpMI;
|
|
}
|
|
|
|
// Advances Iter past a block of predicated instructions.
|
|
// Returns true if it successfully skipped the whole block of predicated
|
|
// instructions. Returns false when it stopped early (due to MaxSteps), or if
|
|
// Iter didn't point to a predicated instruction.
|
|
static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
|
|
MachineBasicBlock::instr_iterator EndIter,
|
|
unsigned MaxSteps,
|
|
unsigned &NumInstrsSteppedOver) {
|
|
ARMVCC::VPTCodes NextPred = ARMVCC::None;
|
|
Register PredReg;
|
|
NumInstrsSteppedOver = 0;
|
|
|
|
while (Iter != EndIter) {
|
|
if (Iter->isDebugInstr()) {
|
|
// Skip debug instructions
|
|
++Iter;
|
|
continue;
|
|
}
|
|
|
|
NextPred = getVPTInstrPredicate(*Iter, PredReg);
|
|
assert(NextPred != ARMVCC::Else &&
|
|
"VPT block pass does not expect Else preds");
|
|
if (NextPred == ARMVCC::None || MaxSteps == 0)
|
|
break;
|
|
--MaxSteps;
|
|
++Iter;
|
|
++NumInstrsSteppedOver;
|
|
};
|
|
|
|
return NumInstrsSteppedOver != 0 &&
|
|
(NextPred == ARMVCC::None || Iter == EndIter);
|
|
}
|
|
|
|
// Returns true if at least one instruction in the range [Iter, End) defines
|
|
// or kills VPR.
|
|
static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter,
|
|
MachineBasicBlock::iterator End) {
|
|
for (; Iter != End; ++Iter)
|
|
if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
|
|
static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) {
|
|
switch (BlockSize) {
|
|
case 1:
|
|
return ARM::PredBlockMask::T;
|
|
case 2:
|
|
return ARM::PredBlockMask::TT;
|
|
case 3:
|
|
return ARM::PredBlockMask::TTT;
|
|
case 4:
|
|
return ARM::PredBlockMask::TTTT;
|
|
default:
|
|
llvm_unreachable("Invalid BlockSize!");
|
|
}
|
|
}
|
|
|
|
// Given an iterator (Iter) that points at an instruction with a "Then"
|
|
// predicate, tries to create the largest block of continuous predicated
|
|
// instructions possible, and returns the VPT Block Mask of that block.
|
|
//
|
|
// This will try to perform some minor optimization in order to maximize the
|
|
// size of the block.
|
|
static ARM::PredBlockMask
|
|
CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
|
|
MachineBasicBlock::instr_iterator EndIter,
|
|
SmallVectorImpl<MachineInstr *> &DeadInstructions) {
|
|
MachineBasicBlock::instr_iterator BlockBeg = Iter;
|
|
(void)BlockBeg;
|
|
assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
|
|
"Expected a Predicated Instruction");
|
|
|
|
LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
|
|
|
|
unsigned BlockSize;
|
|
StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
|
|
|
|
LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
|
|
std::next(BlockBeg);
|
|
AddedInstIter != Iter; ++AddedInstIter) {
|
|
if (AddedInstIter->isDebugInstr())
|
|
continue;
|
|
dbgs() << " adding: ";
|
|
AddedInstIter->dump();
|
|
});
|
|
|
|
// Generate the initial BlockMask
|
|
ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize);
|
|
|
|
// Remove VPNOTs while there's still room in the block, so we can make the
|
|
// largest block possible.
|
|
ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
|
|
while (BlockSize < 4 && Iter != EndIter &&
|
|
Iter->getOpcode() == ARM::MVE_VPNOT) {
|
|
|
|
// Try to skip all of the predicated instructions after the VPNOT, stopping
|
|
// after (4 - BlockSize). If we can't skip them all, stop.
|
|
unsigned ElseInstCnt = 0;
|
|
MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
|
|
if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
|
|
ElseInstCnt))
|
|
break;
|
|
|
|
// Check if this VPNOT can be removed or not: It can only be removed if at
|
|
// least one of the predicated instruction that follows it kills or sets
|
|
// VPR.
|
|
if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
|
|
break;
|
|
|
|
LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
|
|
|
|
// Record the new size of the block
|
|
BlockSize += ElseInstCnt;
|
|
assert(BlockSize <= 4 && "Block is too large!");
|
|
|
|
// Record the VPNot to remove it later.
|
|
DeadInstructions.push_back(&*Iter);
|
|
++Iter;
|
|
|
|
// Replace the predicates of the instructions we're adding.
|
|
// Note that we are using "Iter" to iterate over the block so we can update
|
|
// it at the same time.
|
|
for (; Iter != VPNOTBlockEndIter; ++Iter) {
|
|
if (Iter->isDebugInstr())
|
|
continue;
|
|
|
|
// Find the register in which the predicate is
|
|
int OpIdx = findFirstVPTPredOperandIdx(*Iter);
|
|
assert(OpIdx != -1);
|
|
|
|
// Change the predicate and update the mask
|
|
Iter->getOperand(OpIdx).setImm(CurrentPredicate);
|
|
BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
|
|
|
|
LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
|
|
}
|
|
|
|
CurrentPredicate =
|
|
(CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
|
|
}
|
|
return BlockMask;
|
|
}
|
|
|
|
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
|
|
bool Modified = false;
|
|
MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
|
|
MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
|
|
|
|
SmallVector<MachineInstr *, 4> DeadInstructions;
|
|
|
|
while (MBIter != EndIter) {
|
|
MachineInstr *MI = &*MBIter;
|
|
Register PredReg;
|
|
DebugLoc DL = MI->getDebugLoc();
|
|
|
|
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
|
|
|
|
// The idea of the predicate is that None, Then and Else are for use when
|
|
// handling assembly language: they correspond to the three possible
|
|
// suffixes "", "t" and "e" on the mnemonic. So when instructions are read
|
|
// from assembly source or disassembled from object code, you expect to
|
|
// see a mixture whenever there's a long VPT block. But in code
|
|
// generation, we hope we'll never generate an Else as input to this pass.
|
|
assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
|
|
|
|
if (Pred == ARMVCC::None) {
|
|
++MBIter;
|
|
continue;
|
|
}
|
|
|
|
ARM::PredBlockMask BlockMask =
|
|
CreateVPTBlock(MBIter, EndIter, DeadInstructions);
|
|
|
|
// Search back for a VCMP that can be folded to create a VPT, or else
|
|
// create a VPST directly
|
|
MachineInstrBuilder MIBuilder;
|
|
unsigned NewOpcode;
|
|
LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
|
|
if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
|
|
LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
|
|
MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
|
|
MIBuilder.addImm((uint64_t)BlockMask);
|
|
MIBuilder.add(VCMP->getOperand(1));
|
|
MIBuilder.add(VCMP->getOperand(2));
|
|
MIBuilder.add(VCMP->getOperand(3));
|
|
|
|
// We need to remove any kill flags between the original VCMP and the new
|
|
// insertion point.
|
|
for (MachineInstr &MII :
|
|
make_range(VCMP->getIterator(), MI->getIterator())) {
|
|
MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
|
|
MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
|
|
}
|
|
|
|
VCMP->eraseFromParent();
|
|
} else {
|
|
MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
|
|
MIBuilder.addImm((uint64_t)BlockMask);
|
|
}
|
|
|
|
// Erase all dead instructions (VPNOT's). Do that now so that they do not
|
|
// mess with the bundle creation.
|
|
for (MachineInstr *DeadMI : DeadInstructions)
|
|
DeadMI->eraseFromParent();
|
|
DeadInstructions.clear();
|
|
|
|
finalizeBundle(
|
|
Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
|
|
|
|
Modified = true;
|
|
}
|
|
|
|
return Modified;
|
|
}
|
|
|
|
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
|
|
const ARMSubtarget &STI =
|
|
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
|
|
|
|
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
|
|
return false;
|
|
|
|
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
|
|
TRI = STI.getRegisterInfo();
|
|
|
|
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
|
|
<< "********** Function: " << Fn.getName() << '\n');
|
|
|
|
bool Modified = false;
|
|
for (MachineBasicBlock &MBB : Fn)
|
|
Modified |= InsertVPTBlocks(MBB);
|
|
|
|
LLVM_DEBUG(dbgs() << "**************************************\n");
|
|
return Modified;
|
|
}
|
|
|
|
/// createMVEVPTBlock - Returns an instance of the MVE VPT block
|
|
/// insertion pass.
|
|
FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
|