mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PowerPC] Move TOC save to prologue when profitable
The indirect call sequence on PPC requires that the TOC base register be saved prior to the indirect call and restored after the call since the indirect call may branch to a global entry point in another DSO which will update the TOC base. Over the last couple of years, we have improved this to: - be able to hoist TOC saves from loops (with changes to MachineLICM) - avoid multiple saves when one dominates the other[s] However, it is still possible to have multiple TOC saves dynamically in the execution path if there is no dominance relationship between them. This patch moves the TOC save to the prologue when one of the TOC saves is in a block that post-dominates entry (i.e. it cannot be avoided) or if it is in a block that is hotter than entry. Differential revision: https://reviews.llvm.org/D63803 llvm-svn: 365232
This commit is contained in:
parent
eaaf38400c
commit
9a22014a44
@ -464,6 +464,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
|
||||
bool UseEstimate,
|
||||
unsigned *NewMaxCallFrameSize) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
|
||||
// Get the number of bytes to allocate from the FrameInfo
|
||||
unsigned FrameSize =
|
||||
@ -481,6 +482,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
|
||||
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
|
||||
!MFI.adjustsStack() && // No calls.
|
||||
!MustSaveLR(MF, LR) && // No need to save LR.
|
||||
!FI->mustSaveTOC() && // No need to save TOC.
|
||||
!RegInfo->hasBasePointer(MF); // No special alignment.
|
||||
|
||||
// Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
|
||||
@ -808,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// Check if the link register (LR) must be saved.
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
bool MustSaveLR = FI->mustSaveLR();
|
||||
bool MustSaveTOC = FI->mustSaveTOC();
|
||||
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
|
||||
bool MustSaveCR = !MustSaveCRs.empty();
|
||||
// Do we have a frame pointer and/or base pointer for this function?
|
||||
@ -819,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
unsigned BPReg = RegInfo->getBaseRegister(MF);
|
||||
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
|
||||
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
|
||||
unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
|
||||
unsigned ScratchReg = 0;
|
||||
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
|
||||
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
|
||||
@ -1092,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
HasSTUX = true;
|
||||
}
|
||||
|
||||
// Save the TOC register after the stack pointer update if a prologue TOC
|
||||
// save is required for the function.
|
||||
if (MustSaveTOC) {
|
||||
assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
|
||||
BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
|
||||
.addReg(TOCReg, getKillRegState(true))
|
||||
.addImm(TOCSaveOffset)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
|
||||
if (!HasRedZone) {
|
||||
assert(!isPPC64 && "A red zone is always available on PPC64");
|
||||
if (HasSTUX) {
|
||||
@ -1293,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
if (PPC::CRBITRCRegClass.contains(Reg))
|
||||
continue;
|
||||
|
||||
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
|
||||
continue;
|
||||
|
||||
// For SVR4, don't emit a move for the CR spill slot if we haven't
|
||||
// spilled CRs.
|
||||
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
|
||||
@ -1839,11 +1856,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
||||
unsigned MinFPR = PPC::F31;
|
||||
unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
|
||||
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
bool HasGPSaveArea = false;
|
||||
bool HasG8SaveArea = false;
|
||||
bool HasFPSaveArea = false;
|
||||
bool HasVRSAVESaveArea = false;
|
||||
bool HasVRSaveArea = false;
|
||||
bool MustSaveTOC = FI->mustSaveTOC();
|
||||
|
||||
SmallVector<CalleeSavedInfo, 18> GPRegs;
|
||||
SmallVector<CalleeSavedInfo, 18> G8Regs;
|
||||
@ -1852,6 +1871,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
||||
|
||||
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
|
||||
unsigned Reg = CSI[i].getReg();
|
||||
assert((!MustSaveTOC || (Reg != PPC::X2 && Reg != PPC::R2)) &&
|
||||
"Not expecting to try to spill R2 in a function that must save TOC");
|
||||
if (PPC::GPRCRegClass.contains(Reg) ||
|
||||
PPC::SPE4RCRegClass.contains(Reg)) {
|
||||
HasGPSaveArea = true;
|
||||
@ -2161,6 +2182,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
|
||||
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
|
||||
bool MustSaveTOC = FI->mustSaveTOC();
|
||||
DebugLoc DL;
|
||||
bool CRSpilled = false;
|
||||
MachineInstrBuilder CRMIB;
|
||||
@ -2191,6 +2214,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
continue;
|
||||
}
|
||||
|
||||
// The actual spill will happen in the prologue.
|
||||
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
|
||||
continue;
|
||||
|
||||
// Insert the spill to the stack frame.
|
||||
if (IsCRField) {
|
||||
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
|
||||
@ -2318,6 +2345,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
|
||||
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
|
||||
bool MustSaveTOC = FI->mustSaveTOC();
|
||||
bool CR2Spilled = false;
|
||||
bool CR3Spilled = false;
|
||||
bool CR4Spilled = false;
|
||||
@ -2340,6 +2369,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
|
||||
continue;
|
||||
|
||||
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
|
||||
continue;
|
||||
|
||||
if (Reg == PPC::CR2) {
|
||||
CR2Spilled = true;
|
||||
// The spill slot is associated only with CR2, which is the
|
||||
|
@ -21,9 +21,12 @@
|
||||
#include "PPC.h"
|
||||
#include "PPCInstrBuilder.h"
|
||||
#include "PPCInstrInfo.h"
|
||||
#include "PPCMachineFunctionInfo.h"
|
||||
#include "PPCTargetMachine.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -37,6 +40,7 @@ using namespace llvm;
|
||||
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
|
||||
STATISTIC(MultiTOCSaves,
|
||||
"Number of functions with multiple TOC saves that must be kept");
|
||||
STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
|
||||
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
|
||||
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
|
||||
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
|
||||
@ -84,6 +88,9 @@ struct PPCMIPeephole : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
MachineDominatorTree *MDT;
|
||||
MachinePostDominatorTree *MPDT;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
uint64_t EntryFreq;
|
||||
|
||||
// Initialize class variables.
|
||||
void initialize(MachineFunction &MFParm);
|
||||
@ -102,7 +109,11 @@ public:
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addPreserved<MachinePostDominatorTree>();
|
||||
AU.addPreserved<MachineBlockFrequencyInfo>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -120,6 +131,9 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) {
|
||||
MF = &MFParm;
|
||||
MRI = &MF->getRegInfo();
|
||||
MDT = &getAnalysis<MachineDominatorTree>();
|
||||
MPDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
||||
EntryFreq = MBFI->getEntryFreq();
|
||||
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
|
||||
LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
|
||||
LLVM_DEBUG(MF->dump());
|
||||
@ -200,6 +214,31 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
|
||||
void PPCMIPeephole::UpdateTOCSaves(
|
||||
std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
|
||||
assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
|
||||
assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
|
||||
"TOC-save removal only supported on ELFv2");
|
||||
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
|
||||
MachineBasicBlock *Entry = &MF->front();
|
||||
uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
|
||||
|
||||
// If the block in which the TOC save resides is in a block that
|
||||
// post-dominates Entry, or a block that is hotter than entry (keep in mind
|
||||
// that early MachineLICM has already run so the TOC save won't be hoisted)
|
||||
// we can just do the save in the prologue.
|
||||
if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
|
||||
FI->setMustSaveTOC(true);
|
||||
|
||||
// If we are saving the TOC in the prologue, all the TOC saves can be removed
|
||||
// from the code.
|
||||
if (FI->mustSaveTOC()) {
|
||||
for (auto &TOCSave : TOCSaves)
|
||||
TOCSave.second = false;
|
||||
// Add new instruction to map.
|
||||
TOCSaves[MI] = false;
|
||||
return;
|
||||
}
|
||||
|
||||
bool Keep = true;
|
||||
for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
|
||||
MachineInstr *CurrInst = It->first;
|
||||
@ -777,6 +816,10 @@ bool PPCMIPeephole::simplifyCode(void) {
|
||||
|
||||
// Eliminate all the TOC save instructions which are redundant.
|
||||
Simplified |= eliminateRedundantTOCSaves(TOCSaves);
|
||||
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
|
||||
if (FI->mustSaveTOC())
|
||||
NumTOCSavesInPrologue++;
|
||||
|
||||
// We try to eliminate redundant compare instruction.
|
||||
Simplified |= eliminateRedundantCompare();
|
||||
|
||||
@ -1341,6 +1384,9 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
|
||||
|
||||
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
|
||||
"PowerPC MI Peephole Optimization", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
|
||||
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
|
||||
"PowerPC MI Peephole Optimization", false, false)
|
||||
|
||||
|
@ -44,6 +44,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
|
||||
/// PEI.
|
||||
bool MustSaveLR;
|
||||
|
||||
/// MustSaveTOC - Indicates that the TOC save needs to be performed in the
|
||||
/// prologue of the function. This is typically the case when there are
|
||||
/// indirect calls in the function and it is more profitable to save the
|
||||
/// TOC pointer in the prologue than in the block(s) containing the call(s).
|
||||
bool MustSaveTOC = false;
|
||||
|
||||
/// Do we have to disable shrink-wrapping? This has to be set if we emit any
|
||||
/// instructions that clobber LR in the entry block because discovering this
|
||||
/// in PEI is too late (happens after shrink-wrapping);
|
||||
@ -151,6 +157,9 @@ public:
|
||||
void setMustSaveLR(bool U) { MustSaveLR = U; }
|
||||
bool mustSaveLR() const { return MustSaveLR; }
|
||||
|
||||
void setMustSaveTOC(bool U) { MustSaveTOC = U; }
|
||||
bool mustSaveTOC() const { return MustSaveTOC; }
|
||||
|
||||
/// We certainly don't want to shrink wrap functions if we've emitted a
|
||||
/// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
|
||||
/// has to go into the entry block.
|
||||
|
@ -21,9 +21,9 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
|
||||
; CHECK-NEXT: std 30, -16(1)
|
||||
; CHECK-NEXT: std 0, 16(1)
|
||||
; CHECK-NEXT: stdu 1, -48(1)
|
||||
; CHECK-NEXT: ld 12, 0(3)
|
||||
; CHECK-NEXT: mr 30, 3
|
||||
; CHECK-NEXT: std 2, 24(1)
|
||||
; CHECK-NEXT: mr 30, 3
|
||||
; CHECK-NEXT: ld 12, 0(3)
|
||||
; CHECK-NEXT: mtctr 12
|
||||
; CHECK-NEXT: bctrl
|
||||
; CHECK-NEXT: ld 2, 24(1)
|
||||
|
@ -37,7 +37,6 @@ if.end: ; preds = %entry, %if.then
|
||||
define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: std 2, 24(1)
|
||||
; CHECK: std 2, 24(1)
|
||||
; CHECK-NOT: std 2, 24(1)
|
||||
entry:
|
||||
%tobool = icmp eq i32 %i, 0
|
||||
@ -86,7 +85,6 @@ if.end: ; preds = %if.else, %if.then
|
||||
define signext i32 @test5(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture readnone %Func2) {
|
||||
entry:
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: std 2, 24(1)
|
||||
; CHECK: std 2, 24(1)
|
||||
|
||||
%tobool = icmp eq i32 %i, 0
|
||||
|
@ -19,13 +19,12 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
|
||||
; CHECK-NEXT: cmpwi cr1, r4, 11
|
||||
; CHECK-NEXT: mr r30, r3
|
||||
; CHECK-NEXT: extsw r28, r4
|
||||
; CHECK-NEXT: std r2, 24(r1)
|
||||
; CHECK-NEXT: cmpwi r29, 1
|
||||
; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt
|
||||
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_3
|
||||
; CHECK-NEXT: # %bb.1: # %for.body.us.preheader
|
||||
; CHECK-NEXT: std r2, 24(r1)
|
||||
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2
|
||||
; CHECK-NEXT: .p2align 5
|
||||
; CHECK-NEXT: .LBB0_2: # %for.body.us
|
||||
; CHECK-NEXT: .LBB0_1: # %for.body.us
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: mtctr r30
|
||||
; CHECK-NEXT: mr r3, r28
|
||||
@ -34,12 +33,11 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
|
||||
; CHECK-NEXT: ld 2, 24(r1)
|
||||
; CHECK-NEXT: addi r29, r29, -1
|
||||
; CHECK-NEXT: cmplwi r29, 0
|
||||
; CHECK-NEXT: bne cr0, .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
|
||||
; CHECK-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
|
||||
; CHECK-NEXT: mtctr r30
|
||||
; CHECK-NEXT: mr r3, r28
|
||||
; CHECK-NEXT: mr r12, r30
|
||||
; CHECK-NEXT: std r2, 24(r1)
|
||||
; CHECK-NEXT: bctrl
|
||||
; CHECK-NEXT: ld 2, 24(r1)
|
||||
; CHECK-NEXT: addi r1, r1, 64
|
||||
|
Loading…
x
Reference in New Issue
Block a user