mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
032b2a6c3d
https://reviews.llvm.org/D70210 Previously: Due to sensitivity of the algorithm with gaps, and extra instructions, when diffing, often we see naming being off by a few. Makes the diff unreadable even for tests with 7 and 8 instructions respectively. Naming can change depending on candidates (and order of picking candidates). Suddenly if there's one extra instruction somewhere, the entire subtree would be named completely differently. No consistent naming of similar instructions which occur in different functions. If we try to do something like count the frequency distribution of various differences across suite, then the above sensitivity issues are going to result in poor results. Instead: Name instruction based on semantics of the instruction (hash of the opcode and operands). Essentially for a given instruction that occurs in any module/function it'll be named similarly (ie semantic). This has some nice properties Can easily look at many instructions and just check the hash and if they're named similarly, then it's the same instruction. Makes it very easy to spot the same instruction both multiple times, as well as across many functions (useful for frequency distribution). Independent of traversal/candidates/depth of graph. No need to keep track of last index/gaps/skip count etc. No off by few issues with diffs. I've tried the old vs new implementation in files ranging from 30 to 700 instructions. In both cases with the old algorithm, diffs are a sea of red, where as for the semantic version, in both cases, the diffs line up beautifully. Simplified implementation of the main loop (simple iteration) , no keep track of what's visited and not. Handle collision just by incrementing a counter. Roughly bb[N]_hash_[CollisionCount]. Additionally with the new implementation, we can probably avoid doing the hoisting of instructions to various places, as they'll likely be named the same resulting in differences only based on collision (ie regardless of whether the instruction is hoisted or not/close to use or not, it'll be named the same hash which should result in use of the instruction be identical with the only change being the collision count) which is very easy to spot visually.
146 lines
4.8 KiB
C++
146 lines
4.8 KiB
C++
//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "MIRVRegNamerUtils.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "mir-vregnamer-utils"
|
|
|
|
bool VRegRenamer::doVRegRenaming(
|
|
const std::map<unsigned, unsigned> &VRegRenameMap) {
|
|
bool Changed = false;
|
|
for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
|
|
|
|
auto VReg = I->first;
|
|
auto Rename = I->second;
|
|
|
|
std::vector<MachineOperand *> RenameMOs;
|
|
for (auto &MO : MRI.reg_operands(VReg)) {
|
|
RenameMOs.push_back(&MO);
|
|
}
|
|
|
|
for (auto *MO : RenameMOs) {
|
|
Changed = true;
|
|
MO->setReg(Rename);
|
|
|
|
if (!MO->isDef())
|
|
MO->setIsKill(false);
|
|
}
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
|
|
std::map<unsigned, unsigned>
|
|
VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
|
|
std::map<unsigned, unsigned> VRegRenameMap;
|
|
|
|
std::map<std::string, unsigned> VRegNameCollisionMap;
|
|
|
|
auto GetUniqueVRegName =
|
|
[&VRegNameCollisionMap](const NamedVReg &Reg) -> std::string {
|
|
auto It = VRegNameCollisionMap.find(Reg.getName());
|
|
unsigned Counter = 0;
|
|
if (It != VRegNameCollisionMap.end()) {
|
|
Counter = It->second;
|
|
}
|
|
++Counter;
|
|
VRegNameCollisionMap[Reg.getName()] = Counter;
|
|
return Reg.getName() + "__" + std::to_string(Counter);
|
|
};
|
|
|
|
for (auto &Vreg : VRegs) {
|
|
auto Reg = Vreg.getReg();
|
|
assert(Register::isVirtualRegister(Reg) &&
|
|
"Expecting Virtual Registers Only");
|
|
auto NewNameForReg = GetUniqueVRegName(Vreg);
|
|
auto Rename = createVirtualRegisterWithName(Reg, NewNameForReg);
|
|
|
|
VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
|
|
}
|
|
return VRegRenameMap;
|
|
}
|
|
|
|
std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
|
|
std::string S;
|
|
raw_string_ostream OS(S);
|
|
auto HashOperand = [this](const MachineOperand &MO) -> unsigned {
|
|
if (MO.isImm())
|
|
return MO.getImm();
|
|
if (MO.isTargetIndex())
|
|
return MO.getOffset() | (MO.getTargetFlags() << 16);
|
|
if (MO.isReg()) {
|
|
return Register::isVirtualRegister(MO.getReg())
|
|
? MRI.getVRegDef(MO.getReg())->getOpcode()
|
|
: (unsigned)MO.getReg();
|
|
}
|
|
// We could explicitly handle all the types of the MachineOperand,
|
|
// here but we can just return a common number until we find a
|
|
// compelling test case where this is bad. The only side effect here
|
|
// is contributing to a hash collission but there's enough information
|
|
// (Opcodes,other registers etc) that this will likely not be a problem.
|
|
return 0;
|
|
};
|
|
SmallVector<unsigned, 16> MIOperands;
|
|
MIOperands.push_back(MI.getOpcode());
|
|
for (auto &Op : MI.uses()) {
|
|
MIOperands.push_back(HashOperand(Op));
|
|
}
|
|
auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
|
|
return std::to_string(HashMI).substr(0, 5);
|
|
}
|
|
|
|
unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
|
|
return createVirtualRegisterWithName(
|
|
VReg, getInstructionOpcodeHash(*MRI.getVRegDef(VReg)));
|
|
}
|
|
|
|
bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
|
|
std::vector<NamedVReg> VRegs;
|
|
std::string Prefix = "bb" + std::to_string(getCurrentBBNumber()) + "_";
|
|
for (auto &MII : *MBB) {
|
|
MachineInstr &Candidate = MII;
|
|
// Don't rename stores/branches.
|
|
if (Candidate.mayStore() || Candidate.isBranch())
|
|
continue;
|
|
if (!Candidate.getNumOperands())
|
|
continue;
|
|
// Look for instructions that define VRegs in operand 0.
|
|
MachineOperand &MO = Candidate.getOperand(0);
|
|
// Avoid non regs, instructions defining physical regs.
|
|
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
|
|
continue;
|
|
VRegs.push_back(
|
|
NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
|
|
}
|
|
|
|
// If we have populated no vregs to rename then bail.
|
|
// The rest of this function does the vreg remaping.
|
|
if (VRegs.size() == 0)
|
|
return false;
|
|
|
|
auto VRegRenameMap = getVRegRenameMap(VRegs);
|
|
return doVRegRenaming(VRegRenameMap);
|
|
}
|
|
|
|
bool VRegRenamer::renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) {
|
|
CurrentBBNumber = BBNum;
|
|
return renameInstsInMBB(MBB);
|
|
}
|
|
|
|
unsigned VRegRenamer::createVirtualRegisterWithName(unsigned VReg,
|
|
const std::string &Name) {
|
|
std::string Temp(Name);
|
|
std::transform(Temp.begin(), Temp.end(), Temp.begin(), ::tolower);
|
|
if (auto RC = MRI.getRegClassOrNull(VReg))
|
|
return MRI.createVirtualRegister(RC, Temp);
|
|
return MRI.createGenericVirtualRegister(MRI.getType(VReg), Name);
|
|
}
|