1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[PowerPC] Use mtvsrdd to put callee-saved GPR into VSR

This patch exploits mtvsrdd instruction (available in ISA3.0+) to save
two callee-saved GPR registers into a single VSR, making it more
efficient.

Reviewed By: jsji, nemanjai

Differential Revision: https://reviews.llvm.org/D62565
This commit is contained in:
Qiu Chaofan 2021-04-20 16:32:24 +08:00
parent 12a1ce706d
commit ae1dd01644
3 changed files with 103 additions and 27 deletions

View File

@ -2248,30 +2248,39 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
BVCalleeSaved.set(CSRegs[i]);
for (unsigned Reg : BVAllocatable.set_bits()) {
// Set to 0 if the register is not a volatile VF/F8 register, or if it is
// Set to 0 if the register is not a volatile VSX register, or if it is
// used in the function.
if (BVCalleeSaved[Reg] ||
(!PPC::F8RCRegClass.contains(Reg) &&
!PPC::VFRCRegClass.contains(Reg)) ||
(MF.getRegInfo().isPhysRegUsed(Reg)))
if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
MF.getRegInfo().isPhysRegUsed(Reg))
BVAllocatable.reset(Reg);
}
bool AllSpilledToReg = true;
unsigned LastVSRUsedForSpill = 0;
for (auto &CS : CSI) {
if (BVAllocatable.none())
return false;
unsigned Reg = CS.getReg();
if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
if (!PPC::G8RCRegClass.contains(Reg)) {
AllSpilledToReg = false;
continue;
}
// For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
// into one VSR using the mtvsrdd instruction.
if (LastVSRUsedForSpill != 0) {
CS.setDstReg(LastVSRUsedForSpill);
BVAllocatable.reset(LastVSRUsedForSpill);
LastVSRUsedForSpill = 0;
continue;
}
unsigned VolatileVFReg = BVAllocatable.find_first();
if (VolatileVFReg < BVAllocatable.size()) {
CS.setDstReg(VolatileVFReg);
BVAllocatable.reset(VolatileVFReg);
LastVSRUsedForSpill = VolatileVFReg;
} else {
AllSpilledToReg = false;
}
@ -2290,6 +2299,24 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
BitVector Spilled(TRI->getNumRegs());
VSRContainingGPRs.clear();
// Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
// or two GPRs, so we need table to record information for later save/restore.
llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
if (Info.isSpilledToReg()) {
auto &SpilledVSR =
VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
assert(SpilledVSR.second == 0 &&
"Can't spill more than two GPRs into VSR!");
if (SpilledVSR.first == 0)
SpilledVSR.first = Info.getReg();
else
SpilledVSR.second = Info.getReg();
}
});
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
@ -2339,9 +2366,31 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
}
} else {
if (CSI[i].isSpilledToReg()) {
NumPESpillVSR++;
BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
.addReg(Reg, getKillRegState(true));
unsigned Dst = CSI[i].getDstReg();
if (Spilled[Dst])
continue;
if (VSRContainingGPRs[Dst].second != 0) {
assert(Subtarget.hasP9Vector() &&
"mtvsrdd is unavailable on pre-P9 targets.");
NumPESpillVSR += 2;
BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
.addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
.addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
} else if (VSRContainingGPRs[Dst].second == 0) {
assert(Subtarget.hasP8Vector() &&
"Can't move GPR to VSR on pre-P8 targets.");
++NumPESpillVSR;
BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
TRI->getSubReg(Dst, PPC::sub_64))
.addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
} else {
llvm_unreachable("More than two GPRs spilled to a VSR!");
}
Spilled.set(Dst);
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
// Use !IsLiveIn for the kill flag.
@ -2445,6 +2494,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
bool CR3Spilled = false;
bool CR4Spilled = false;
unsigned CSIIndex = 0;
BitVector Restored(TRI->getNumRegs());
// Initialize insertion-point logic; we will be restoring in reverse
// order of spill.
@ -2489,9 +2539,32 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
if (CSI[i].isSpilledToReg()) {
DebugLoc DL;
NumPEReloadVSR++;
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
.addReg(CSI[i].getDstReg(), getKillRegState(true));
unsigned Dst = CSI[i].getDstReg();
if (Restored[Dst])
continue;
if (VSRContainingGPRs[Dst].second != 0) {
assert(Subtarget.hasP9Vector());
NumPEReloadVSR += 2;
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
VSRContainingGPRs[Dst].second)
.addReg(Dst);
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
VSRContainingGPRs[Dst].first)
.addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
} else if (VSRContainingGPRs[Dst].second == 0) {
assert(Subtarget.hasP8Vector());
++NumPEReloadVSR;
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
VSRContainingGPRs[Dst].first)
.addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
} else {
llvm_unreachable("More than two GPRs spilled to a VSR!");
}
Restored.set(Dst);
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);

View File

@ -28,6 +28,10 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned BasePointerSaveOffset;
const unsigned CRSaveOffset;
// Map each group of one or two GPRs to corresponding VSR for spilling.
// TODO: Use local table in methods to avoid this mutable member.
mutable DenseMap<unsigned, std::pair<Register, Register>> VSRContainingGPRs;
/**
* Find register[s] that can be used in function prologue and epilogue
*

View File

@ -14,15 +14,15 @@ body: |
$v20 = IMPLICIT_DEF
BLR8 implicit undef $lr8, implicit undef $rm
# Use mtvsrdd to save two GPRs in a single instruction
# CHECK-LABEL: name: test1BB
# CHECK: body: |
# CHECK: liveins: $x14, $x15, $x16, $v20
# CHECK: $f1 = MTVSRD killed $x14
# CHECK-NEXT: $f2 = MTVSRD killed $x15
# CHECK-NEXT: $f3 = MTVSRD killed $x16
# CHECK: $x16 = MFVSRD killed $f3
# CHECK-NEXT: $x15 = MFVSRD killed $f2
# CHECK-NEXT: $x14 = MFVSRD killed $f1
# CHECK: $v0 = MTVSRDD killed $x14, killed $x15
# CHECK-NEXT: $vf1 = MTVSRD killed $x16
# CHECK: $x16 = MFVSRD killed $vf1
# CHECK-NEXT: $x15 = MFVSRLD $v0
# CHECK-NEXT: $x14 = MFVSRD killed $vf0
...
---
@ -55,15 +55,14 @@ body: |
## clobbered before restored in the epilogue.
# CHECK-LABEL: name: test2BB
# CHECK: body: |
# CHECK: $f0 = MTVSRD killed $x14
# CHECK-NEXT: $f1 = MTVSRD killed $x15
# CHECK-NEXT: $f2 = MTVSRD killed $x16
# CHECK: $v0 = MTVSRDD killed $x14, killed $x15
# CHECK-NEXT: $vf1 = MTVSRD killed $x16
# CHECK: bb.2:
# CHECK-NEXT: successors: %bb.3
# CHECK-NEXT: liveins: $f0, $f1, $f2
# CHECK-NEXT: liveins: $v0, $v1
# CHECK: bb.3:
# CHECK-NEXT: liveins: $f0, $f1, $f2
# CHECK: $x16 = MFVSRD killed $f2
# CHECK-NEXT: $x15 = MFVSRD killed $f1
# CHECK-NEXT: $x14 = MFVSRD killed $f0
# CHECK-NEXT: liveins: $v0, $v1
# CHECK: $x16 = MFVSRD killed $vf1
# CHECK-NEXT: $x15 = MFVSRLD $v0
# CHECK-NEXT: $x14 = MFVSRD killed $vf0
...