mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
Revert "[IPRA][ARM] Spill extra registers at -Oz"
Reverting because this is causing failures on bots with expensive checks enabled. This reverts commit 73cea83a6f5ab521edf3cccfc603534776d691ec.
This commit is contained in:
parent
de0c111de7
commit
30718af145
@ -91,8 +91,6 @@ private:
|
||||
/// all registers that were disabled are removed from the list.
|
||||
SmallVector<MCPhysReg, 16> UpdatedCSRs;
|
||||
|
||||
void initUpdatedCSRs();
|
||||
|
||||
/// RegAllocHints - This vector records register allocation hints for
|
||||
/// virtual registers. For each virtual register, it keeps a pair of hint
|
||||
/// type and hints vector making up the allocation hints. Only the first
|
||||
@ -233,17 +231,12 @@ public:
|
||||
|
||||
/// Disables the register from the list of CSRs.
|
||||
/// I.e. the register will not appear as part of the CSR mask.
|
||||
/// \see UpdatedCSRs.
|
||||
void disableCalleeSavedRegister(Register Reg);
|
||||
|
||||
/// Enables the register from the list of CSRs.
|
||||
/// I.e. the register will appear as part of the CSR mask.
|
||||
/// \see UpdatedCSRs.
|
||||
void enableCalleeSavedRegister(Register Reg);
|
||||
/// \see UpdatedCalleeSavedRegs.
|
||||
void disableCalleeSavedRegister(unsigned Reg);
|
||||
|
||||
/// Returns list of callee saved registers.
|
||||
/// The function returns the updated CSR list (after taking into account
|
||||
/// registers that are enabled/disabled from the CSR list).
|
||||
/// registers that are disabled from the CSR list).
|
||||
const MCPhysReg *getCalleeSavedRegs() const;
|
||||
|
||||
/// Sets the updated Callee Saved Registers list.
|
||||
|
@ -610,54 +610,30 @@ bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void MachineRegisterInfo::initUpdatedCSRs() {
|
||||
if (IsUpdatedCSRsInitialized)
|
||||
return;
|
||||
void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
|
||||
|
||||
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
|
||||
const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
|
||||
for (const MCPhysReg *I = CSR; *I; ++I)
|
||||
UpdatedCSRs.push_back(*I);
|
||||
|
||||
// Zero value represents the end of the register list
|
||||
// (no more registers should be pushed).
|
||||
UpdatedCSRs.push_back(0);
|
||||
|
||||
IsUpdatedCSRsInitialized = true;
|
||||
}
|
||||
|
||||
void MachineRegisterInfo::disableCalleeSavedRegister(Register Reg) {
|
||||
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
|
||||
assert(Reg && (Reg < TRI->getNumRegs()) &&
|
||||
"Trying to disable an invalid register");
|
||||
|
||||
initUpdatedCSRs();
|
||||
if (!IsUpdatedCSRsInitialized) {
|
||||
const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
|
||||
for (const MCPhysReg *I = CSR; *I; ++I)
|
||||
UpdatedCSRs.push_back(*I);
|
||||
|
||||
// Remove the register (and its aliases) from the CSR list.
|
||||
// Zero value represents the end of the register list
|
||||
// (no more registers should be pushed).
|
||||
UpdatedCSRs.push_back(0);
|
||||
|
||||
IsUpdatedCSRsInitialized = true;
|
||||
}
|
||||
|
||||
// Remove the register (and its aliases from the list).
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
|
||||
UpdatedCSRs.end());
|
||||
}
|
||||
|
||||
void MachineRegisterInfo::enableCalleeSavedRegister(Register Reg) {
|
||||
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
|
||||
assert(Reg && (Reg < TRI->getNumRegs()) &&
|
||||
"Trying to disable an invalid register");
|
||||
|
||||
initUpdatedCSRs();
|
||||
|
||||
// Remove the null terminator from the end of the list.
|
||||
assert(UpdatedCSRs.back() == 0);
|
||||
UpdatedCSRs.pop_back();
|
||||
|
||||
// Add the register (and its sub-registers) to the CSR list.
|
||||
for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI)
|
||||
UpdatedCSRs.push_back(*SRI);
|
||||
|
||||
// Put the null terminator back.
|
||||
UpdatedCSRs.push_back(0);
|
||||
}
|
||||
|
||||
const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
|
||||
if (IsUpdatedCSRsInitialized)
|
||||
return UpdatedCSRs.data();
|
||||
|
@ -451,8 +451,6 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
|
||||
FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Assigned " << RegInfo->getName(Reg)
|
||||
<< " to spill slot " << FrameIdx << "\n");
|
||||
CS.setFrameIdx(FrameIdx);
|
||||
}
|
||||
}
|
||||
|
@ -71,14 +71,6 @@ static cl::opt<bool>
|
||||
SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
|
||||
cl::desc("Align ARM NEON spills in prolog and epilog"));
|
||||
|
||||
static cl::opt<bool> EnableExtraSpills(
|
||||
"arm-extra-spills", cl::Hidden, cl::init(false),
|
||||
cl::desc("Preserve extra registers when useful for IPRA"));
|
||||
|
||||
// Testing option to bypass some profitability checks.
|
||||
static cl::opt<bool> ForceExtraSpills("arm-extra-spills-force", cl::Hidden,
|
||||
cl::init(false));
|
||||
|
||||
static MachineBasicBlock::iterator
|
||||
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
|
||||
unsigned NumAlignedDPRCS2Regs);
|
||||
@ -1625,251 +1617,6 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
|
||||
SavedRegs.set(ARM::R4);
|
||||
}
|
||||
|
||||
// Compute the set of registers which cannot be preserved, because they are
|
||||
// either modified outside the PUSH/POP instructions, or are live at the point
|
||||
// where the POP will be inserted. This only considers r0-r3, which are
|
||||
// currently the only registers we voluntatrily save when the PCS doesn't
|
||||
// require it.
|
||||
void ARMFrameLowering::findRegDefsOutsideSaveRestore(
|
||||
MachineFunction &MF, BitVector &UnsaveableRegs) const {
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
SmallSet<MachineBasicBlock *, 2> SaveBlocks;
|
||||
SmallSet<MachineBasicBlock *, 2> RestoreBlocks;
|
||||
|
||||
if (MFI.getSavePoint()) {
|
||||
SaveBlocks.insert(MFI.getSavePoint());
|
||||
RestoreBlocks.insert(MFI.getRestorePoint());
|
||||
} else {
|
||||
SaveBlocks.insert(&MF.front());
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
if (MBB.isReturnBlock())
|
||||
RestoreBlocks.insert(&MBB);
|
||||
}
|
||||
|
||||
// Walk blocks from the function entry and exits (following control flow both
|
||||
// ways), stopping when we get to a save/restore block. Check for
|
||||
// instructions which modify any of the registers we care about.
|
||||
SmallVector<MachineBasicBlock *, 4> WorkList;
|
||||
SmallSet<MachineBasicBlock *, 4> VisitedBlocks;
|
||||
LLVM_DEBUG(dbgs() << "Entry block: " << MF.front().getName() << "\n");
|
||||
WorkList.push_back(&MF.front());
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
if (MBB.isReturnBlock()) {
|
||||
LLVM_DEBUG(dbgs() << "Return block: " << MBB.getName() << "\n");
|
||||
WorkList.push_back(&MBB);
|
||||
}
|
||||
}
|
||||
|
||||
auto CheckOutsideInst = [&UnsaveableRegs, TRI](MachineInstr &MI) {
|
||||
for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
|
||||
if (MI.modifiesRegister(Reg, TRI)) {
|
||||
UnsaveableRegs.set(Reg);
|
||||
LLVM_DEBUG(dbgs() << "Register " << TRI->getName(Reg)
|
||||
<< " modified by instruction " << MI << "\n");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
while (!WorkList.empty()) {
|
||||
MachineBasicBlock *MBB = WorkList.pop_back_val();
|
||||
|
||||
if (VisitedBlocks.count(MBB))
|
||||
continue;
|
||||
VisitedBlocks.insert(MBB);
|
||||
|
||||
bool IsSave = SaveBlocks.count(MBB);
|
||||
bool IsRestore = RestoreBlocks.count(MBB);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Visiting block " << MBB->getName() << ", IsSave="
|
||||
<< IsSave << ", IsRestore=" << IsRestore << "\n");
|
||||
|
||||
// If this is a restore block, the POP instruction will be inserted just
|
||||
// before the terminator, so we need to consider any terminator
|
||||
// instructions to be outside the preserved region. We also need to check
|
||||
// for registers which are live at the POP insertion point, because these
|
||||
// can't be restored without changing their value.
|
||||
if (IsRestore) {
|
||||
LivePhysRegs LPR(*TRI);
|
||||
LPR.addLiveOuts(*MBB);
|
||||
for (auto &Term : reverse(MBB->terminators())) {
|
||||
LPR.stepBackward(Term);
|
||||
CheckOutsideInst(Term);
|
||||
}
|
||||
|
||||
for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
|
||||
if (LPR.contains(Reg)) {
|
||||
UnsaveableRegs.set(Reg);
|
||||
LLVM_DEBUG(dbgs() << "Register " << TRI->getName(Reg)
|
||||
<< " live-out of restore block " << MBB->getName()
|
||||
<< "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If this block is completely outside the save/restore region, then any
|
||||
// modified registers can't be preserved. A save block counts as being
|
||||
// inside the saved region, with the possible exception of the last few
|
||||
// instructions if it's also a restore block, handled above. We don't visit
|
||||
// blocks which are completely inside the saved region and don't have any
|
||||
// save/restore instructions, so don't need to check that here.
|
||||
if (!IsSave && !IsRestore)
|
||||
for (auto &MI : *MBB)
|
||||
CheckOutsideInst(MI);
|
||||
|
||||
// Walk the control flow graph in both directions, except for blocks which
|
||||
// are inside the PUSH/POP region.
|
||||
if (IsSave || !IsRestore)
|
||||
for (auto Pred : MBB->predecessors())
|
||||
WorkList.push_back(Pred);
|
||||
if (!IsSave || IsRestore)
|
||||
for (auto Succ : MBB->successors())
|
||||
WorkList.push_back(Succ);
|
||||
}
|
||||
}
|
||||
|
||||
bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
|
||||
// Shrink wrapping is detrimental to code size because it prevents merging
|
||||
// the CSR restore and function return into one POP instruction. It also
|
||||
// conflicts with saving extra registers for IPRA, because it makes more
|
||||
// registers live at the PUSH/POP.
|
||||
if (MF.getFunction().hasMinSize())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// When doing inter-procedural register allocation, saving extra registers in
|
||||
// [r0,r3] will allow us to keep live values in them in any callers. The extra
|
||||
// saves and restores don't cost us any code-size if we are already emitting
|
||||
// PUSH and POP instructions.
|
||||
unsigned ARMFrameLowering::spillExtraRegsForIPRA(MachineFunction &MF,
|
||||
BitVector &SavedRegs,
|
||||
bool HasFPRegSaves) const {
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Extra spills for " << MF.getName() << ": ");
|
||||
|
||||
if (!EnableExtraSpills) {
|
||||
LLVM_DEBUG(dbgs() << "optimisation not enabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If IPRA is not enabled, nothing will be able to take advantage of the
|
||||
// extra saved registers.
|
||||
if (!MF.getTarget().Options.EnableIPRA) {
|
||||
LLVM_DEBUG(dbgs() << "IPRA disabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// These registers will take extra time to save and restore, and will often
|
||||
// go unused, so only to this at -Oz.
|
||||
if (!MF.getFunction().hasMinSize()) {
|
||||
LLVM_DEBUG(dbgs() << "not minsize\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we are not currently spilling any registers, we'd need to add an extra
|
||||
// PUSH/POP pair, so this isn't worth it.
|
||||
if (!SavedRegs.any()) {
|
||||
LLVM_DEBUG(dbgs() << "no existing push/pop\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we can't guarantee that this definition of the function is the one
|
||||
// which will be picked by the linker, then IPRA can't make use of any extra
|
||||
// saved registers.
|
||||
if (!MF.getFunction().isDefinitionExact()) {
|
||||
LLVM_DEBUG(dbgs() << "inexact definition\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NumVisibleCallers = 0;
|
||||
for (const User *U : MF.getFunction().users()) {
|
||||
if (const CallBase *Call = dyn_cast<CallBase>(U)) {
|
||||
if (Call->getCalledOperand() == &MF.getFunction()) {
|
||||
++NumVisibleCallers;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we don't have any direct callers in the current translation unit,
|
||||
// nothing will be able to take advantage of the extra saved registers.
|
||||
if (NumVisibleCallers == 0 && !ForceExtraSpills) {
|
||||
LLVM_DEBUG(dbgs() << "no visible callers\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we need to emit unwind tables, these will be longer if we need to
|
||||
// preserve r0-r3, so we need a lot of visible calls to make this worthwhile.
|
||||
if (MF.getFunction().needsUnwindTableEntry() && NumVisibleCallers <= 8 &&
|
||||
!ForceExtraSpills) {
|
||||
LLVM_DEBUG(dbgs() << "needs unwind table\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Ok, we've decided we are going to try the optimisation.
|
||||
LLVM_DEBUG(dbgs() << "enabled\n");
|
||||
|
||||
// Compute the registers which can't be preserved because they are either
|
||||
// modified before the PUSH or after the POP, or are live at the point where
|
||||
// the POP will be inserted.
|
||||
BitVector NonPreserveableRegisters;
|
||||
NonPreserveableRegisters.resize(TRI->getNumRegs());
|
||||
findRegDefsOutsideSaveRestore(MF, NonPreserveableRegisters);
|
||||
|
||||
unsigned NumExtraRegs = 0;
|
||||
|
||||
// We'd also like to leave some registers free so that we can use them to
|
||||
// fold a small SP update into the PUSH/POP. We can't know exactly what this
|
||||
// optimisation can do, because stack layout isn't finalised, but we can make
|
||||
// a good enough estimate.
|
||||
unsigned StackSize = MFI.estimateStackSize(MF);
|
||||
|
||||
// If the stack space is large, we probably won't be able to fold the SP
|
||||
// update into the push/pop, so we should use all the registers we want. If
|
||||
// we have FP register saves, then the SP update will be folded into the
|
||||
// VPUSH/VPOP instead, and we can use the GPRs freely.
|
||||
if (StackSize > 16 || HasFPRegSaves)
|
||||
StackSize = 0;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Estimated " << StackSize
|
||||
<< " bytes of SP update being folded into push/pop\n");
|
||||
|
||||
for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
|
||||
if (StackSize) {
|
||||
StackSize -= 4;
|
||||
LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
|
||||
<< ", wanted for SP update\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we don't modify the register anywhere in this function, IPRA will
|
||||
// already know that it is preserved, and there's no point in saving it.
|
||||
if (!MRI.isPhysRegModified(Reg)) {
|
||||
LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
|
||||
<< ", not modified\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NonPreserveableRegisters[Reg]) {
|
||||
LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
|
||||
<< ", modified outide save region\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "also saving " << TRI->getName(Reg) << " for IPRA\n");
|
||||
SavedRegs.set(Reg);
|
||||
MRI.enableCalleeSavedRegister(Reg);
|
||||
++NumExtraRegs;
|
||||
}
|
||||
|
||||
return NumExtraRegs;
|
||||
}
|
||||
|
||||
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
BitVector &SavedRegs,
|
||||
RegScavenger *RS) const {
|
||||
@ -2260,14 +2007,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
<< "\n");
|
||||
}
|
||||
|
||||
// When using IPRA, we might want to preserve some of r0-r3, to reduce
|
||||
// register pressure in our callers.
|
||||
unsigned ExtraIPRASpills =
|
||||
spillExtraRegsForIPRA(MF, SavedRegs, NumFPRSpills != 0);
|
||||
NumGPRSpills += ExtraIPRASpills;
|
||||
if (ExtraIPRASpills)
|
||||
CS1Spilled = true;
|
||||
|
||||
// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
|
||||
// restore LR in that case.
|
||||
bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
|
||||
|
@ -56,10 +56,6 @@ public:
|
||||
|
||||
void getCalleeSaves(const MachineFunction &MF,
|
||||
BitVector &SavedRegs) const override;
|
||||
void findRegDefsOutsideSaveRestore(MachineFunction &MF,
|
||||
BitVector &Regs) const;
|
||||
unsigned spillExtraRegsForIPRA(MachineFunction &MF, BitVector &SavedRegs,
|
||||
bool HasFPRegSaves) const;
|
||||
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
||||
RegScavenger *RS) const override;
|
||||
|
||||
@ -67,8 +63,9 @@ public:
|
||||
MachineBasicBlock &MBB) const override;
|
||||
|
||||
/// Returns true if the target will correctly handle shrink wrapping.
|
||||
bool enableShrinkWrapping(const MachineFunction &MF) const override;
|
||||
|
||||
bool enableShrinkWrapping(const MachineFunction &MF) const override {
|
||||
return true;
|
||||
}
|
||||
bool isProfitableForNoCSROpt(const Function &F) const override {
|
||||
// The no-CSR optimisation is bad for code size on ARM, because we can save
|
||||
// many registers with a single PUSH/POP pair.
|
||||
|
@ -2463,24 +2463,25 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
RegsToPass[i].second.getValueType()));
|
||||
|
||||
// Add a register mask operand representing the call-preserved registers.
|
||||
const uint32_t *Mask;
|
||||
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
|
||||
if (isThisReturn) {
|
||||
// For 'this' returns, use the R0-preserving mask if applicable
|
||||
Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
|
||||
if (!Mask) {
|
||||
// Set isThisReturn to false if the calling convention is not one that
|
||||
// allows 'returned' to be modeled in this way, so LowerCallResult does
|
||||
// not try to pass 'this' straight through
|
||||
isThisReturn = false;
|
||||
if (!isTailCall) {
|
||||
const uint32_t *Mask;
|
||||
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
|
||||
if (isThisReturn) {
|
||||
// For 'this' returns, use the R0-preserving mask if applicable
|
||||
Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
|
||||
if (!Mask) {
|
||||
// Set isThisReturn to false if the calling convention is not one that
|
||||
// allows 'returned' to be modeled in this way, so LowerCallResult does
|
||||
// not try to pass 'this' straight through
|
||||
isThisReturn = false;
|
||||
Mask = ARI->getCallPreservedMask(MF, CallConv);
|
||||
}
|
||||
} else
|
||||
Mask = ARI->getCallPreservedMask(MF, CallConv);
|
||||
}
|
||||
} else {
|
||||
Mask = ARI->getCallPreservedMask(MF, CallConv);
|
||||
}
|
||||
|
||||
assert(Mask && "Missing call preserved mask for calling convention");
|
||||
Ops.push_back(DAG.getRegisterMask(Mask));
|
||||
assert(Mask && "Missing call preserved mask for calling convention");
|
||||
Ops.push_back(DAG.getRegisterMask(Mask));
|
||||
}
|
||||
|
||||
if (InFlag.getNode())
|
||||
Ops.push_back(InFlag);
|
||||
|
@ -216,10 +216,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case ARM::R0:
|
||||
case ARM::R1:
|
||||
case ARM::R2:
|
||||
case ARM::R3:
|
||||
case ARM::R4:
|
||||
case ARM::R5:
|
||||
case ARM::R6:
|
||||
@ -852,8 +848,7 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
|
||||
if (!LoRegsToSave.none()) {
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
|
||||
for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5,
|
||||
ARM::R6, ARM::R7, ARM::LR}) {
|
||||
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
|
||||
if (LoRegsToSave[Reg]) {
|
||||
bool isKill = !MRI.isLiveIn(Reg);
|
||||
if (isKill && !MRI.isReserved(Reg))
|
||||
@ -961,9 +956,6 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
|
||||
llvm_unreachable("callee-saved register of unexpected class");
|
||||
}
|
||||
|
||||
if (Reg == ARM::LR)
|
||||
I.setRestored(false);
|
||||
|
||||
// If this is a low register not used as the frame pointer, we may want to
|
||||
// use it for restoring the high registers.
|
||||
if ((ARM::tGPRRegClass.contains(Reg)) &&
|
||||
@ -988,9 +980,6 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
|
||||
static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
|
||||
ARM::R4, ARM::R5, ARM::R6, ARM::R7};
|
||||
static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
|
||||
static const unsigned AllLoRegs[] = {ARM::R0, ARM::R1, ARM::R2,
|
||||
ARM::R3, ARM::R4, ARM::R5,
|
||||
ARM::R6, ARM::R7, ARM::LR};
|
||||
|
||||
const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
|
||||
const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
|
||||
@ -1029,10 +1018,16 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
|
||||
BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
|
||||
|
||||
bool NeedsPop = false;
|
||||
for (unsigned Reg : AllLoRegs) {
|
||||
if (!LoRegsToRestore[Reg])
|
||||
for (unsigned i = CSI.size(); i != 0; --i) {
|
||||
CalleeSavedInfo &Info = CSI[i-1];
|
||||
unsigned Reg = Info.getReg();
|
||||
|
||||
// High registers (excluding lr) have already been dealt with
|
||||
if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
|
||||
continue;
|
||||
|
||||
if (Reg == ARM::LR) {
|
||||
Info.setRestored(false);
|
||||
if (!MBB.succ_empty() ||
|
||||
MI->getOpcode() == ARM::TCRETURNdi ||
|
||||
MI->getOpcode() == ARM::TCRETURNri)
|
||||
|
@ -1,149 +0,0 @@
|
||||
; RUN: llc -mtriple armv7a--none-eabi -enable-ipra=true -arm-extra-spills -arm-extra-spills-force -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple thumbv7a--none-eabi -enable-ipra=true -arm-extra-spills -arm-extra-spills-force -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Test the interaction between IPRA and C++ exception handling. Currently, IPRA
|
||||
; only marks registers as preserved on the non-exceptional return path, not in
|
||||
; the landing pad.
|
||||
|
||||
declare dso_local i8* @__cxa_allocate_exception(i32) local_unnamed_addr
|
||||
declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
|
||||
declare dso_local i32 @__gxx_personality_v0(...)
|
||||
declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
|
||||
declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr
|
||||
declare dso_local void @__cxa_end_catch() local_unnamed_addr
|
||||
|
||||
@g = dso_local local_unnamed_addr global i32 0, align 4
|
||||
@_ZTIi = external dso_local constant i8*
|
||||
|
||||
define dso_local i32 @_Z11maybe_throwv() minsize {
|
||||
; This function might return normally, or might throw an exception. r0 is used
|
||||
; for a return value, we can preserve r1-r3 for IPRA.
|
||||
; CHECK: .save {r1, r2, r3, lr}
|
||||
; CHECK-NEXT: push {r1, r2, r3, lr}
|
||||
; CHECK: pop{{(..)?}} {r1, r2, r3, pc}
|
||||
entry:
|
||||
%0 = load i32, i32* @g, align 4
|
||||
%tobool = icmp eq i32 %0, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%exception = tail call i8* @__cxa_allocate_exception(i32 4)
|
||||
%1 = bitcast i8* %exception to i32*
|
||||
store i32 42, i32* %1, align 8
|
||||
tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
|
||||
unreachable
|
||||
|
||||
if.else: ; preds = %entry
|
||||
ret i32 1337
|
||||
}
|
||||
|
||||
; Use inline assembly to force r0-r3 to be alive across a potentially throwing
|
||||
; call, using them on the non-exceptional return path. r0 is the return value,
|
||||
; so must be copied to another register. r1-r3 are voluntarily preserved by the
|
||||
; callee, so can be left in those registers.
|
||||
define dso_local i32 @_Z25test_non_exceptional_pathv() minsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
; CHECK: @APP
|
||||
; CHECK-NEXT: @ def r0-r3
|
||||
; CHECK-NEXT: @NO_APP
|
||||
; CHECK-NEXT: mov [[SAVE_R0:r[0-9]+]], r0
|
||||
; CHECK-NEXT: .Ltmp{{.*}}
|
||||
; CHECK-NEXT: bl _Z11maybe_throwv
|
||||
; CHECK: mov r0, [[SAVE_R0]]
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: @ use r0-r3
|
||||
; CHECK-NEXT: @NO_APP
|
||||
entry:
|
||||
%0 = tail call { i32, i32, i32, i32 } asm sideeffect "// def r0-r3", "={r0},={r1},={r2},={r3}"()
|
||||
%call = invoke i32 @_Z11maybe_throwv()
|
||||
to label %try.cont unwind label %lpad
|
||||
|
||||
lpad: ; preds = %entry
|
||||
%1 = landingpad { i8*, i32 }
|
||||
cleanup
|
||||
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||
%2 = extractvalue { i8*, i32 } %1, 1
|
||||
%3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
|
||||
%matches = icmp eq i32 %2, %3
|
||||
br i1 %matches, label %catch, label %ehcleanup
|
||||
|
||||
catch: ; preds = %lpad
|
||||
%4 = extractvalue { i8*, i32 } %1, 0
|
||||
%5 = tail call i8* @__cxa_begin_catch(i8* %4)
|
||||
%6 = bitcast i8* %5 to i32*
|
||||
%7 = load i32, i32* %6, align 4
|
||||
tail call void @__cxa_end_catch()
|
||||
br label %cleanup
|
||||
|
||||
try.cont: ; preds = %entry
|
||||
%asmresult3 = extractvalue { i32, i32, i32, i32 } %0, 3
|
||||
%asmresult2 = extractvalue { i32, i32, i32, i32 } %0, 2
|
||||
%asmresult1 = extractvalue { i32, i32, i32, i32 } %0, 1
|
||||
%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
|
||||
tail call void asm sideeffect "// use r0-r3", "{r0},{r1},{r2},{r3}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3)
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %try.cont, %catch
|
||||
%retval.0 = phi i32 [ 0, %try.cont ], [ %7, %catch ]
|
||||
ret i32 %retval.0
|
||||
|
||||
ehcleanup: ; preds = %lpad
|
||||
resume { i8*, i32 } %1
|
||||
}
|
||||
|
||||
|
||||
; Use inline assembly to force r0-r3 to be alive across a potentially throwing
|
||||
; call, using them after catching the exception. IPRA does not currently mark
|
||||
; voluntarily preserved registers as live into the landing pad block, so all
|
||||
; four registers must be copied elsewhere.
|
||||
define dso_local i32 @_Z21test_exceptional_pathv() local_unnamed_addr minsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
; CHECK: @APP
|
||||
; CHECK-NEXT: @ def r0-r3
|
||||
; CHECK-NEXT: @NO_APP
|
||||
; CHECK-DAG: mov [[SAVE_R0:r[0-9]+]], r0
|
||||
; CHECK-DAG: mov [[SAVE_R1:r[0-9]+]], r1
|
||||
; CHECK-DAG: mov [[SAVE_R2:r[0-9]+]], r2
|
||||
; CHECK-DAG: mov [[SAVE_R3:r[0-9]+]], r3
|
||||
; CHECK: bl _Z11maybe_throw
|
||||
|
||||
; CHECK: bl __cxa_begin_catch
|
||||
; CHECK: mov r0, [[SAVE_R0]]
|
||||
; CHECK-NEXT: mov r1, [[SAVE_R1]]
|
||||
; CHECK-NEXT: mov r2, [[SAVE_R2]]
|
||||
; CHECK-NEXT: mov r3, [[SAVE_R3]]
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: @ use r0-r3
|
||||
; CHECK-NEXT: @NO_APP
|
||||
entry:
|
||||
%0 = tail call { i32, i32, i32, i32 } asm sideeffect "// def r0-r3", "={r0},={r1},={r2},={r3}"()
|
||||
%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
|
||||
%asmresult1 = extractvalue { i32, i32, i32, i32 } %0, 1
|
||||
%asmresult2 = extractvalue { i32, i32, i32, i32 } %0, 2
|
||||
%asmresult3 = extractvalue { i32, i32, i32, i32 } %0, 3
|
||||
%call = invoke i32 @_Z11maybe_throwv()
|
||||
to label %cleanup unwind label %lpad
|
||||
|
||||
lpad: ; preds = %entry
|
||||
%1 = landingpad { i8*, i32 }
|
||||
cleanup
|
||||
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||
%2 = extractvalue { i8*, i32 } %1, 1
|
||||
%3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
|
||||
%matches = icmp eq i32 %2, %3
|
||||
br i1 %matches, label %catch, label %ehcleanup
|
||||
|
||||
catch: ; preds = %lpad
|
||||
%4 = extractvalue { i8*, i32 } %1, 0
|
||||
%5 = tail call i8* @__cxa_begin_catch(i8* %4)
|
||||
%6 = bitcast i8* %5 to i32*
|
||||
%7 = load i32, i32* %6, align 4
|
||||
tail call void asm sideeffect "// use r0-r3", "{r0},{r1},{r2},{r3}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3)
|
||||
tail call void @__cxa_end_catch()
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %catch
|
||||
%retval.0 = phi i32 [ %7, %catch ], [ 0, %entry ]
|
||||
ret i32 %retval.0
|
||||
|
||||
ehcleanup: ; preds = %lpad
|
||||
resume { i8*, i32 } %1
|
||||
}
|
@ -1,406 +0,0 @@
|
||||
; RUN: llc -mtriple armv7a--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
|
||||
; RUN: llc -mtriple thumbv7a--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
|
||||
; RUN: llc -mtriple thumbv6m--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
|
||||
|
||||
; This clobbers r0, and already needs a push/pop, so we also save and restore
|
||||
; r0. The push of r11 is to maintain stack alignment (though that isn't
|
||||
; technically needed in this example).
|
||||
define void @test_r0_r4() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r4:
|
||||
; ARM: .save {r0, r4, r11, lr}
|
||||
; ARM: push {r0, r4, r11, lr}
|
||||
; ARM: pop {r0, r4, r11, pc}
|
||||
; THUMB1: .save {r0, r4, r7, lr}
|
||||
; THUMB1: push {r0, r4, r7, lr}
|
||||
; THUMB1: pop {r0, r4, r7, pc}
|
||||
; THUMB2: .save {r0, r4, r7, lr}
|
||||
; THUMB2: push {r0, r4, r7, lr}
|
||||
; THUMB2: pop {r0, r4, r7, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, and already needs a push/pop, so we also save and
|
||||
; restore all of them.
|
||||
define void @test_r0_r1_r2_r3_r4() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4:
|
||||
; CHECK: .save {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: push {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: pop {r0, r1, r2, r3, r4, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that IPRA does make use of the extra saved registers.
|
||||
define void @test_ipra() nounwind {
|
||||
; CHECK-LABEL: test_ipra:
|
||||
; CHECK: ASM1: r0, r1, r2, r3
|
||||
; CHECK-NOT: r0
|
||||
; CHECK-NOT: r1
|
||||
; CHECK-NOT: r2
|
||||
; CHECK-NOT: r3
|
||||
; CHECK: bl test_r0_r1_r2_r3_r4
|
||||
; CHECK-NOT: r0
|
||||
; CHECK-NOT: r1
|
||||
; CHECK-NOT: r2
|
||||
; CHECK-NOT: r3
|
||||
; CHECK: ASM2: r0, r1, r2, r3
|
||||
%regs = call { i32, i32, i32, i32 } asm sideeffect "// ASM1: $0, $1, $2, $3", "={r0},={r1},={r2},={r3}"()
|
||||
%r0 = extractvalue { i32, i32, i32, i32 } %regs, 0
|
||||
%r1 = extractvalue { i32, i32, i32, i32 } %regs, 1
|
||||
%r2 = extractvalue { i32, i32, i32, i32 } %regs, 2
|
||||
%r3 = extractvalue { i32, i32, i32, i32 } %regs, 3
|
||||
call void @test_r0_r1_r2_r3_r4()
|
||||
call void asm sideeffect "// ASM2: $0, $1, $2, $3", "{r0},{r1},{r2},{r3}"(i32 %r0, i32 %r1, i32 %r2, i32 %r3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, but doesn't otherwise need a push/pop, so we don't add
|
||||
; them.
|
||||
define void @test_r0_r1_r2_r3() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3:
|
||||
; CHECK-NOT: push
|
||||
; CHECK-NOT: pop
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This isn't called in this function, so we don't push any extra registers.
|
||||
define void @test_r0_r4_not_called() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r4_not_called:
|
||||
; CHECK: .save {r4, lr}
|
||||
; CHECK: push {r4, lr}
|
||||
; CHECK: pop {r4, pc}
|
||||
; CHECK-NOT: push
|
||||
; CHECK-NOT: pop
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This function is only optsize, not minsize, so we don't add any extra saves.
|
||||
define void @test_r0_r4_not_minsize() optsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r4_not_minsize:
|
||||
; CHECK: .save {r4, lr}
|
||||
; CHECK: push {r4, lr}
|
||||
; CHECK: pop {r4, pc}
|
||||
; CHECK-NOT: push
|
||||
; CHECK-NOT: pop
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This function is not an exact definition (the linker could pick an
|
||||
; alternative version of it), so we don't add any extra saves.
|
||||
define linkonce_odr void @test_r0_r4_not_exact() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r4_not_exact:
|
||||
; CHECK: .save {r4, lr}
|
||||
; CHECK: push {r4, lr}
|
||||
; CHECK: pop {r4, pc}
|
||||
; CHECK-NOT: push
|
||||
; CHECK-NOT: pop
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, but returns a value in r0, so only r1-r3 are saved.
|
||||
define i32 @test_r0_r1_r2_r3_r4_return_1() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_1:
|
||||
; ARM: .save {r1, r2, r3, r4, r11, lr}
|
||||
; ARM: push {r1, r2, r3, r4, r11, lr}
|
||||
; ARM: pop {r1, r2, r3, r4, r11, pc}
|
||||
; THUMB1: .save {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB1: push {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB1: pop {r1, r2, r3, r4, r7, pc}
|
||||
; THUMB2: .save {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB2: push {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB2: pop {r1, r2, r3, r4, r7, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, but returns a value in r0 and r1, so only r2-r3 are
|
||||
; saved.
|
||||
define i64 @test_r0_r1_r2_r3_r4_return_2() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_2:
|
||||
; CHECK: .save {r2, r3, r4, lr}
|
||||
; CHECK: push {r2, r3, r4, lr}
|
||||
; CHECK: pop {r2, r3, r4, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
|
||||
ret i64 42
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, but returns a value in all of r0-r3, so none of them can
|
||||
; be saved.
|
||||
define i128 @test_r0_r1_r2_r3_r4_return_4() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_4:
|
||||
; CHECK: .save {r4, lr}
|
||||
; CHECK: push {r4, lr}
|
||||
; CHECK: pop {r4, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
|
||||
ret i128 42
|
||||
}
|
||||
|
||||
; This clobbers r0-r3, and returns a value in s0, so all of r0-r3 are saved (we
|
||||
; previously only checked the number of return registers, ignoring their
|
||||
; class).
|
||||
define arm_aapcs_vfpcc float @test_r0_r1_r2_r3_r4_return_float() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_float:
|
||||
; ARM: .save {r0, r1, r2, r3, r4, lr}
|
||||
; ARM: push {r0, r1, r2, r3, r4, lr}
|
||||
; ARM: pop {r0, r1, r2, r3, r4, pc}
|
||||
; THUMB1: .save {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB1: push {r1, r2, r3, r4, r7, lr}
|
||||
; THUMB1: pop {r1, r2, r3, r4, r7, pc}
|
||||
; THUMB2: .save {r0, r1, r2, r3, r4, lr}
|
||||
; THUMB2: push {r0, r1, r2, r3, r4, lr}
|
||||
; THUMB2: pop {r0, r1, r2, r3, r4, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
|
||||
ret float 42.0
|
||||
}
|
||||
|
||||
; Saving of high registers in thumb1 is more complicated, because they need to
|
||||
; be copied down to low registers to use push/pop instructions. Luckily, the
|
||||
; extra registers we are preserving are low registers, which are handled by the
|
||||
; outer-most push/pop pair, so this doesn't interact badly.
|
||||
define void @test_save_high_regs() minsize nounwind {
|
||||
; CHECK-LABEL: test_save_high_regs:
|
||||
; ARM: .save {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
|
||||
; ARM: push {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
|
||||
; ARM: pop {r0, r1, r2, r3, r7, r8, r9, r10, r11, pc}
|
||||
; THUMB1: .save {r0, r1, r2, r3, r7, lr}
|
||||
; THUMB1-NEXT: push {r0, r1, r2, r3, r7, lr}
|
||||
; THUMB1-NEXT: mov lr, r11
|
||||
; THUMB1-NEXT: mov r7, r10
|
||||
; THUMB1-NEXT: mov r3, r9
|
||||
; THUMB1-NEXT: mov r2, r8
|
||||
; THUMB1-NEXT: .save {r8, r9, r10, r11}
|
||||
; THUMB1-NEXT: push {r2, r3, r7, lr}
|
||||
; THUMB1: pop {r0, r1, r2, r3}
|
||||
; THUMB1-NEXT: mov r8, r0
|
||||
; THUMB1-NEXT: mov r9, r1
|
||||
; THUMB1-NEXT: mov r10, r2
|
||||
; THUMB1-NEXT: mov r11, r3
|
||||
; THUMB1-NEXT: pop {r0, r1, r2, r3, r7, pc}
|
||||
; THUMB2: .save {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
|
||||
; THUMB2: push.w {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
|
||||
; THUMB2: pop.w {r0, r1, r2, r3, r7, r8, r9, r10, r11, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r8},~{r9},~{r10},~{r11}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; We can also use extra registers in the PUSH/POP instructions to move the SP
|
||||
; to make space for local variables. These registers aren't preserved, because
|
||||
; the space they are saved in is used for the local variable. We try to back
|
||||
; off the extra-CSRs optimisation to allow this to still happen. In this case,
|
||||
; there are 8 bytes of stack space needed, so we preserve two argument
|
||||
; registers and use the other two for the SP update.
|
||||
define void @test_r0_r1_r2_r3_r4_stack8() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack8:
|
||||
; CHECK: .save {r2, r3, r4, lr}
|
||||
; CHECK: push {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: pop {r0, r1, r2, r3, r4, pc}
|
||||
%a = alloca [2 x i32], align 4
|
||||
call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([2 x i32]* %a, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that, when the above function is called, r0 and r1 (used for the SP
|
||||
; updates) are considered clobbered, and r2 and r3 are preserved.
|
||||
define void @test_r0_r1_r2_r3_r4_stack8_caller() nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack8_caller:
|
||||
; CHECK: ASM1: r0, r1, r2, r3
|
||||
; CHECK-NEXT: @NO_APP
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r5, r1
|
||||
; CHECK-NEXT: bl test_r0_r1_r2_r3_r4
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: mov r1, r5
|
||||
; CHECK-NEXT: @APP
|
||||
; CHECK-NEXT: ASM2: r0, r1, r2, r3
|
||||
%regs = call { i32, i32, i32, i32 } asm sideeffect "// ASM1: $0, $1, $2, $3", "={r0},={r1},={r2},={r3}"()
|
||||
%r0 = extractvalue { i32, i32, i32, i32 } %regs, 0
|
||||
%r1 = extractvalue { i32, i32, i32, i32 } %regs, 1
|
||||
%r2 = extractvalue { i32, i32, i32, i32 } %regs, 2
|
||||
%r3 = extractvalue { i32, i32, i32, i32 } %regs, 3
|
||||
call void @test_r0_r1_r2_r3_r4_stack8()
|
||||
call void asm sideeffect "// ASM2: $0, $1, $2, $3", "{r0},{r1},{r2},{r3}"(i32 %r0, i32 %r1, i32 %r2, i32 %r3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Like @test_r0_r1_r2_r3_r4_stack8, but 16 bytes of stack space are needed, so
|
||||
; all of r0-r3 are used for the SP update, and not preserved.
|
||||
define void @test_r0_r1_r2_r3_r4_stack16() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack16:
|
||||
; CHECK: .save {r4, lr}
|
||||
; CHECK: push {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: pop {r0, r1, r2, r3, r4, pc}
|
||||
%a = alloca [4 x i32], align 4
|
||||
call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([4 x i32]* %a, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; If more than 16 bytes of stack space are needed, it's unlikely that the
|
||||
; SP-update folding optimisation will succeed, so we revert back to preserving
|
||||
; r0-r3 for use in our callers.
|
||||
define void @test_r0_r1_r2_r3_r4_stack24() minsize nounwind {
|
||||
; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack24:
|
||||
; CHECK: .save {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: push {r0, r1, r2, r3, r4, lr}
|
||||
; CHECK: pop {r0, r1, r2, r3, r4, pc}
|
||||
%a = alloca [6 x i32], align 4
|
||||
call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([6 x i32]* %a, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @tail_callee(i32 %a, i32 %b) minsize nounwind {
|
||||
entry:
|
||||
tail call void asm sideeffect "", "~{r2}"()
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
; The tail call happens outside the save/restore region, so prevents us from
|
||||
; preserving some registers. r0 and r1 are outgoing arguments to the tail-call,
|
||||
; so can't be preserved. r2 is modified inside the tail-called function, so
|
||||
; can't be presrved. r3 is known to be preserved by the callee, so can be
|
||||
; presrved. For Thumb1, we can't (efficiently) use a tail-call here, so r1-r3
|
||||
; are all preserved, with r0 being the return value.
|
||||
define i32 @test_tail_call() minsize nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: test_tail_call:
|
||||
; ARM: .save {r3, lr}
|
||||
; ARM: push {r3, lr}
|
||||
; ARM: pop {r3, lr}
|
||||
; ARM: b tail_callee
|
||||
; THUMB2: .save {r3, lr}
|
||||
; THUMB2: push {r3, lr}
|
||||
; THUMB2: pop.w {r3, lr}
|
||||
; THUMB2: b tail_callee
|
||||
; THUMB1: .save {r1, r2, r3, lr}
|
||||
; THUMB1: push {r1, r2, r3, lr}
|
||||
; THUMB1: bl tail_callee
|
||||
; THUMB1: pop {r1, r2, r3, pc}
|
||||
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
|
||||
%call = tail call i32 @tail_callee(i32 3, i32 4)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i32 @tail_callee_external(i32 %a, i32 %b)
|
||||
|
||||
; If we tail-call an external function, it could clobber any of r0-r3.
|
||||
define i32 @test_tail_call_external() minsize nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: test_tail_call_external:
|
||||
; ARM: .save {r11, lr}
|
||||
; ARM: push {r11, lr}
|
||||
; ARM: pop {r11, lr}
|
||||
; ARM: b tail_callee_external
|
||||
; THUMB2: .save {r7, lr}
|
||||
; THUMB2: push {r7, lr}
|
||||
; THUMB2: pop.w {r7, lr}
|
||||
; THUMB2: b tail_callee_external
|
||||
; THUMB1: .save {r1, r2, r3, lr}
|
||||
; THUMB1: push {r1, r2, r3, lr}
|
||||
; THUMB1: bl tail_callee_external
|
||||
; THUMB1: pop {r1, r2, r3, pc}
|
||||
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
|
||||
%call = tail call i32 @tail_callee_external(i32 3, i32 4)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
define linkonce_odr i32 @tail_callee_linkonce_odr(i32 %a, i32 %b) minsize nounwind {
|
||||
entry:
|
||||
tail call void asm sideeffect "", "~{r2}"()
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
; If a tail-callee has an interposable linkage type (such as linkonce_odr), we
|
||||
; can't assume the linker will pick the definition we can see, so must assume
|
||||
; it clobbers all of r0-r3.
|
||||
define i32 @test_tail_call_linkonce_odr() minsize nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: test_tail_call_linkonce_odr:
|
||||
; ARM: .save {r11, lr}
|
||||
; ARM: push {r11, lr}
|
||||
; ARM: pop {r11, lr}
|
||||
; ARM: b tail_callee_linkonce_odr
|
||||
; THUMB2: .save {r7, lr}
|
||||
; THUMB2: push {r7, lr}
|
||||
; THUMB2: pop.w {r7, lr}
|
||||
; THUMB2: b tail_callee_linkonce_odr
|
||||
; THUMB1: .save {r1, r2, r3, lr}
|
||||
; THUMB1: push {r1, r2, r3, lr}
|
||||
; THUMB1: bl tail_callee_linkonce_odr
|
||||
; THUMB1: pop {r1, r2, r3, pc}
|
||||
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
|
||||
%call = tail call i32 @tail_callee_linkonce_odr(i32 3, i32 4)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; This function doesn't have the nounwind attribute, so unwind tables will be
|
||||
; emitted. Saving r0-r3 requires a longer unwind instruction sequence, which
|
||||
; results in an increase in total code size if there are few callers to make
|
||||
; use of the extra registers.
|
||||
define void @test_unwind_tables() minsize {
|
||||
; CHECK-LABEL: test_unwind_tables:
|
||||
; ARM: .save {r4, lr}
|
||||
; ARM: push {r4, lr}
|
||||
; ARM: pop {r4, pc}
|
||||
; THUMB1: .save {r4, lr}
|
||||
; THUMB1: push {r4, lr}
|
||||
; THUMB1: pop {r4, pc}
|
||||
; THUMB2: .save {r4, lr}
|
||||
; THUMB2: push {r4, lr}
|
||||
; THUMB2: pop {r4, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; This requires an unwind table, but has many call sites, so overall we expect
|
||||
; the benefits to outweigh the size increase of the unwind table.
|
||||
define void @test_unwind_tables_many_calls() minsize {
|
||||
; CHECK-LABEL: test_unwind_tables_many_calls:
|
||||
; ARM: .save {r0, r4, r11, lr}
|
||||
; ARM: push {r0, r4, r11, lr}
|
||||
; ARM: pop {r0, r4, r11, pc}
|
||||
; THUMB1: .save {r0, r4, r7, lr}
|
||||
; THUMB1: push {r0, r4, r7, lr}
|
||||
; THUMB1: pop {r0, r4, r7, pc}
|
||||
; THUMB2: .save {r0, r4, r7, lr}
|
||||
; THUMB2: push {r0, r4, r7, lr}
|
||||
; THUMB2: pop {r0, r4, r7, pc}
|
||||
call void asm sideeffect "", "~{r0},~{r4}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
; We don't do this optimisation is there are no callers in the same translation
|
||||
; unit (otherwise IPRA wouldn't be able to take advantage of the extra saved
|
||||
; registers), so most functions in this file are called here.
|
||||
define void @caller() {
|
||||
; CHECK-LABEL: caller:
|
||||
call void @test_r0_r4()
|
||||
call void @test_r0_r1_r2_r3_r4()
|
||||
call void @test_r0_r1_r2_r3()
|
||||
call void @test_r0_r4_not_minsize()
|
||||
call void @test_r0_r4_not_exact()
|
||||
%t1 = call i32 @test_r0_r1_r2_r3_r4_return_1()
|
||||
%t2 = call i64 @test_r0_r1_r2_r3_r4_return_2()
|
||||
%t3 = call i128 @test_r0_r1_r2_r3_r4_return_4()
|
||||
%t4 = call float @test_r0_r1_r2_r3_r4_return_float()
|
||||
call void @test_save_high_regs()
|
||||
call void @test_r0_r1_r2_r3_r4_stack16()
|
||||
call void @test_r0_r1_r2_r3_r4_stack24()
|
||||
%t5 = call i32 @test_tail_call()
|
||||
%t6 = call i32 @test_tail_call_external()
|
||||
%t7 = call i32 @test_tail_call_linkonce_odr()
|
||||
call void @test_unwind_tables()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
call void @test_unwind_tables_many_calls()
|
||||
ret void
|
||||
}
|
@ -66,13 +66,16 @@ return: ; preds = %entry, %if.end
|
||||
define void @f3(i32 %x) #0 {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: cmp r0, #1
|
||||
; CHECK-NEXT: bne .LBB2_2
|
||||
; CHECK-NEXT: @ %bb.1: @ %t
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: cmp r0, #1
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: moveq r0, #0
|
||||
; CHECK-NEXT: bleq fn
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: bl fn
|
||||
; CHECK-NEXT: pop.w {r7, lr}
|
||||
; CHECK-NEXT: .LBB2_2: @ %f
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%p = icmp eq i32 %x, 1
|
||||
br i1 %p, label %t, label %f
|
||||
|
Loading…
Reference in New Issue
Block a user