1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[MIR-Canon] Adding support for local idempotent instruction hoisting.

llvm-svn: 328915
This commit is contained in:
Puyan Lotfi 2018-03-31 05:48:51 +00:00
parent 08af09d478
commit b93beb070b
2 changed files with 232 additions and 5 deletions

View File

@ -131,7 +131,43 @@ static unsigned GetDummyVReg(const MachineFunction &MF) {
return ~0U;
}
static bool rescheduleCanonically(MachineBasicBlock *MBB) {
static bool
rescheduleLexographically(std::vector<MachineInstr *> instructions,
MachineBasicBlock *MBB,
std::function<MachineBasicBlock::iterator()> getPos) {
bool Changed = false;
std::map<std::string, MachineInstr*> StringInstrMap;
for (auto *II : instructions) {
std::string S;
raw_string_ostream OS(S);
II->print(OS);
OS.flush();
// Trim the assignment, or start from the begining in the case of a store.
const size_t i = S.find("=");
StringInstrMap.insert({(i == std::string::npos) ? S : S.substr(i), II});
}
for (auto &II : StringInstrMap) {
DEBUG({
dbgs() << "Splicing ";
II.second->dump();
dbgs() << " right before: ";
getPos()->dump();
});
Changed = true;
MBB->splice(getPos(), MBB, II.second);
}
return Changed;
}
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
MachineBasicBlock *MBB) {
bool Changed = false;
@ -153,13 +189,59 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
Instructions.push_back(&MI);
}
std::vector<MachineInstr *> PseudoIdempotentInstructions;
std::vector<unsigned> PhysRegDefs;
for (auto *II : Instructions) {
for (unsigned i = 1; i < II->getNumOperands(); i++) {
MachineOperand &MO = II->getOperand(i);
if (!MO.isReg())
continue;
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
continue;
PhysRegDefs.push_back(MO.getReg());
}
}
for (auto *II : Instructions) {
if (II->getNumOperands() == 0)
continue;
if (II->mayLoadOrStore())
continue;
MachineOperand &MO = II->getOperand(0);
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
continue;
bool IsPseudoIdempotent = true;
for (unsigned i = 1; i < II->getNumOperands(); i++) {
if (II->getOperand(i).isImm()) {
continue;
}
if (II->getOperand(i).isReg()) {
if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg()))
if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
PhysRegDefs.end()) {
continue;
}
}
IsPseudoIdempotent = false;
break;
}
if (IsPseudoIdempotent) {
PseudoIdempotentInstructions.push_back(II);
continue;
}
DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
@ -194,9 +276,6 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
if (DefI != BBE && UseI != BBE)
break;
if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
continue;
if (&*BBI == Def) {
DefI = BBI;
continue;
@ -222,6 +301,12 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
MBB->splice(UseI, MBB, DefI);
}
PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
Changed |= rescheduleLexographically(
PseudoIdempotentInstructions, MBB,
[&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
return Changed;
}
@ -517,7 +602,8 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
Changed |= rescheduleCanonically(MBB);
unsigned IdempotentInstCount = 0;
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
@ -579,6 +665,31 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
// Here we renumber the def vregs for the idempotent instructions from the top
// of the MachineBasicBlock so that they are named in the order that we sorted
// them alphabetically. Eventually we wont need SkipVRegs because we will use
// named vregs instead.
unsigned gap = 1;
SkipVRegs(gap, MRI, DummyRC);
auto MII = MBB->begin();
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
MachineInstr &MI = *MII++;
Changed = true;
unsigned vRegToRename = MI.getOperand(0).getReg();
auto Rename = MRI.createVirtualRegister(MRI.getRegClass(vRegToRename));
std::vector<MachineOperand *> RenameMOs;
for (auto &MO : MRI.reg_operands(vRegToRename)) {
RenameMOs.push_back(&MO);
}
for (auto *MO : RenameMOs) {
MO->setReg(Rename);
}
}
Changed |= doDefKillClear(MBB);
DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);

View File

@ -0,0 +1,116 @@
# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -run-pass mir-canonicalizer %s | FileCheck %s
# These Idempotent instructions are sorted alphabetically (based on after the '=')
# CHECK: %4353:gpr64 = MOVi64imm 4617315517961601024
# CHECK: %4354:gpr32 = MOVi32imm 408
# CHECK: %4355:gpr64all = IMPLICIT_DEF
# CHECK: %4356:fpr64 = FMOVDi 20
# CHECK: %4357:fpr64 = FMOVDi 112
...
---
name: Proc8
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4, di-variable: '', di-expression: '', di-location: '' }
- { id: 1, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -16, di-variable: '', di-expression: '', di-location: '' }
- { id: 2, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -24, di-variable: '', di-expression: '', di-location: '' }
- { id: 3, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -32, di-variable: '', di-expression: '', di-location: '' }
- { id: 4, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -40, di-variable: '', di-expression: '', di-location: '' }
- { id: 5, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -48, di-variable: '', di-expression: '', di-location: '' }
- { id: 6, type: default, offset: 0, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -56, di-variable: '', di-expression: '', di-location: '' }
constants:
body: |
bb.0:
liveins: $x0, $x1, $d0, $d1
%3:fpr64 = COPY $d1
%2:fpr64 = COPY $d0
%1:gpr64 = COPY $x1
%0:gpr64common = COPY $x0
STRXui %0, %stack.1, 0 :: (store 8)
STRXui %1, %stack.2, 0 :: (store 8)
STRDui %2, %stack.3, 0 :: (store 8)
STRDui %3, %stack.4, 0 :: (store 8)
%4:fpr64 = FMOVDi 20
%5:fpr64 = FADDDrr %2, killed %4
STRDui %5, %stack.5, 0 :: (store 8)
%6:gpr32 = FCVTZSUWDr %5
STRDroW %3, %0, killed %6, 1, 1
%7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
%8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
%9:gpr32common = FCVTZSUWDr killed %8
%10:fpr64 = LDRDroW %7, %9, 1, 1
%11:gpr32common = ADDWri %9, 1, 0
STRDroW killed %10, %7, killed %11, 1, 1
%12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
%13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
%14:gpr32common = FCVTZSUWDr %12
%15:gpr32common = ADDWri killed %14, 30, 0
STRDroW %12, killed %13, killed %15, 1, 1
%16:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
STRDui killed %16, %stack.6, 0 :: (store 8)
%19:fpr64 = FMOVDi 112
%46:gpr32 = MOVi32imm 408
%43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
%44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
%45:gpr32 = FCVTZSUWDr %43
%47:gpr64common = SMADDLrrr killed %45, %46, killed %44
%48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load 8)
%49:gpr32 = FCVTZSUWDr killed %48
STRDroW %43, killed %47, killed %49, 1, 1
%21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
%22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
%23:gpr32 = FCVTZSUWDr killed %22
%24:gpr32 = MOVi32imm 408
%25:gpr64common = SMADDLrrr %23, %24, killed %21
%26:gpr64sp = ADDXrx killed %25, %23, 51
%27:fpr64 = LDURDi %26, -8
%29:fpr64 = FADDDrr killed %27, %19
STURDi killed %29, %26, -8
%30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
%31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
%32:gpr32common = FCVTZSUWDr killed %31
%34:gpr64all = IMPLICIT_DEF
%33:gpr64 = INSERT_SUBREG %34, %32, %subreg.sub_32
%35:gpr64 = SBFMXri killed %33, 61, 31
%36:fpr64 = LDRDroX killed %30, %35, 0, 0
%37:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
%38:gpr32common = ADDWri %32, 20, 0
%39:gpr64common = SMADDLrrr killed %38, %24, killed %37
STRDroX killed %36, killed %39, %35, 0, 0
%40:gpr64 = MOVi64imm 4617315517961601024
%42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8)
$w0 = COPY %42
RET_ReallyLR implicit $w0
...