mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[X86] Convert esp-relative movs of function arguments to pushes, step 2
This moves the transformation introduced in r223757 into a separate MI pass. This allows it to cover many more cases (not only cases where there must be a reserved call frame), and perform rudimentary call folding. It still doesn't have a heuristic, so it is enabled only for optsize/minsize, with stack alignment <= 8, where it ought to be a fairly clear win. (Re-commit of r227728) Differential Revision: http://reviews.llvm.org/D6789 llvm-svn: 227752
This commit is contained in:
parent
722b41d62b
commit
41ae9af2e3
@ -193,6 +193,11 @@ public:
|
|||||||
return hasReservedCallFrame(MF) || hasFP(MF);
|
return hasReservedCallFrame(MF) || hasFP(MF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// needsFrameIndexResolution - Do we need to perform FI resolution for
|
||||||
|
// this function. Normally, this is required only when the function
|
||||||
|
// has any stack objects. However, targets may want to override this.
|
||||||
|
virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
|
||||||
|
|
||||||
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
||||||
/// the stack frame of the specified index.
|
/// the stack frame of the specified index.
|
||||||
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
|
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
|
||||||
|
@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
|
|||||||
/// register references and actual offsets.
|
/// register references and actual offsets.
|
||||||
///
|
///
|
||||||
void PEI::replaceFrameIndices(MachineFunction &Fn) {
|
void PEI::replaceFrameIndices(MachineFunction &Fn) {
|
||||||
if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
|
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
|
||||||
|
if (!TFI.needsFrameIndexResolution(Fn)) return;
|
||||||
|
|
||||||
// Store SPAdj at exit of a basic block.
|
// Store SPAdj at exit of a basic block.
|
||||||
SmallVector<int, 8> SPState;
|
SmallVector<int, 8> SPState;
|
||||||
@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are looking at a call sequence, we need to keep track of
|
|
||||||
// the SP adjustment made by each instruction in the sequence.
|
|
||||||
// This includes both the frame setup/destroy pseudos (handled above),
|
|
||||||
// as well as other instructions that have side effects w.r.t the SP.
|
|
||||||
if (InsideCallSequence)
|
|
||||||
SPAdj += TII.getSPAdjust(I);
|
|
||||||
|
|
||||||
MachineInstr *MI = I;
|
MachineInstr *MI = I;
|
||||||
bool DoIncr = true;
|
bool DoIncr = true;
|
||||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||||
@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are looking at a call sequence, we need to keep track of
|
||||||
|
// the SP adjustment made by each instruction in the sequence.
|
||||||
|
// This includes both the frame setup/destroy pseudos (handled above),
|
||||||
|
// as well as other instructions that have side effects w.r.t the SP.
|
||||||
|
// Note that this must come after eliminateFrameIndex, because
|
||||||
|
// if I itself referred to a frame index, we shouldn't count its own
|
||||||
|
// adjustment.
|
||||||
|
if (MI && InsideCallSequence)
|
||||||
|
SPAdj += TII.getSPAdjust(MI);
|
||||||
|
|
||||||
if (DoIncr && I != BB->end()) ++I;
|
if (DoIncr && I != BB->end()) ++I;
|
||||||
|
|
||||||
// Update register states.
|
// Update register states.
|
||||||
|
@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
|||||||
FrameReg = RI->getFrameRegister(MF);
|
FrameReg = RI->getFrameRegister(MF);
|
||||||
return getFrameIndexOffset(MF, FI);
|
return getFrameIndexOffset(MF, FI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TargetFrameLowering::needsFrameIndexResolution(
|
||||||
|
const MachineFunction &MF) const {
|
||||||
|
return MF.getFrameInfo()->hasStackObjects();
|
||||||
|
}
|
||||||
|
@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
|
|||||||
|
|
||||||
set(sources
|
set(sources
|
||||||
X86AsmPrinter.cpp
|
X86AsmPrinter.cpp
|
||||||
|
X86CallFrameOptimization.cpp
|
||||||
X86FastISel.cpp
|
X86FastISel.cpp
|
||||||
X86FloatingPoint.cpp
|
X86FloatingPoint.cpp
|
||||||
X86FrameLowering.cpp
|
X86FrameLowering.cpp
|
||||||
|
@ -64,6 +64,11 @@ FunctionPass *createX86PadShortFunctions();
|
|||||||
/// to eliminate execution delays in some Atom processors.
|
/// to eliminate execution delays in some Atom processors.
|
||||||
FunctionPass *createX86FixupLEAs();
|
FunctionPass *createX86FixupLEAs();
|
||||||
|
|
||||||
|
/// createX86CallFrameOptimization - Return a pass that optimizes
|
||||||
|
/// the code-size of x86 call sequences. This is done by replacing
|
||||||
|
/// esp-relative movs with pushes.
|
||||||
|
FunctionPass *createX86CallFrameOptimization();
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
400
lib/Target/X86/X86CallFrameOptimization.cpp
Normal file
400
lib/Target/X86/X86CallFrameOptimization.cpp
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines a pass that optimizes call sequences on x86.
|
||||||
|
// Currently, it converts movs of function parameters onto the stack into
|
||||||
|
// pushes. This is beneficial for two main reasons:
|
||||||
|
// 1) The push instruction encoding is much smaller than an esp-relative mov
|
||||||
|
// 2) It is possible to push memory arguments directly. So, if the
|
||||||
|
// the transformation is preformed pre-reg-alloc, it can help relieve
|
||||||
|
// register pressure.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "X86.h"
|
||||||
|
#include "X86InstrInfo.h"
|
||||||
|
#include "X86Subtarget.h"
|
||||||
|
#include "X86MachineFunctionInfo.h"
|
||||||
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/Passes.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "x86-cf-opt"
|
||||||
|
|
||||||
|
cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt",
|
||||||
|
cl::desc("Avoid optimizing x86 call frames for size"),
|
||||||
|
cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class X86CallFrameOptimization : public MachineFunctionPass {
|
||||||
|
public:
|
||||||
|
X86CallFrameOptimization() : MachineFunctionPass(ID) {}
|
||||||
|
|
||||||
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool shouldPerformTransformation(MachineFunction &MF);
|
||||||
|
|
||||||
|
bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I);
|
||||||
|
|
||||||
|
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
|
||||||
|
unsigned Reg);
|
||||||
|
|
||||||
|
const char *getPassName() const override {
|
||||||
|
return "X86 Optimize Call Frame";
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetInstrInfo *TII;
|
||||||
|
const TargetFrameLowering *TFL;
|
||||||
|
const MachineRegisterInfo *MRI;
|
||||||
|
static char ID;
|
||||||
|
};
|
||||||
|
|
||||||
|
char X86CallFrameOptimization::ID = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionPass *llvm::createX86CallFrameOptimization() {
|
||||||
|
return new X86CallFrameOptimization();
|
||||||
|
}
|
||||||
|
|
||||||
|
// This checks whether the transformation is legal and profitable
|
||||||
|
bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) {
|
||||||
|
if (NoX86CFOpt.getValue())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We currently only support call sequences where *all* parameters.
|
||||||
|
// are passed on the stack.
|
||||||
|
// No point in running this in 64-bit mode, since some arguments are
|
||||||
|
// passed in-register in all common calling conventions, so the pattern
|
||||||
|
// we're looking for will never match.
|
||||||
|
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
|
||||||
|
if (STI.is64Bit())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// You would expect straight-line code between call-frame setup and
|
||||||
|
// call-frame destroy. You would be wrong. There are circumstances (e.g.
|
||||||
|
// CMOV_GR8 expansion of a select that feeds a function call!) where we can
|
||||||
|
// end up with the setup and the destroy in different basic blocks.
|
||||||
|
// This is bad, and breaks SP adjustment.
|
||||||
|
// So, check that all of the frames in the function are closed inside
|
||||||
|
// the same block, and, for good measure, that there are no nested frames.
|
||||||
|
int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
|
||||||
|
int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
|
||||||
|
for (MachineBasicBlock &BB : MF) {
|
||||||
|
bool InsideFrameSequence = false;
|
||||||
|
for (MachineInstr &MI : BB) {
|
||||||
|
if (MI.getOpcode() == FrameSetupOpcode) {
|
||||||
|
if (InsideFrameSequence)
|
||||||
|
return false;
|
||||||
|
InsideFrameSequence = true;
|
||||||
|
}
|
||||||
|
else if (MI.getOpcode() == FrameDestroyOpcode) {
|
||||||
|
if (!InsideFrameSequence)
|
||||||
|
return false;
|
||||||
|
InsideFrameSequence = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (InsideFrameSequence)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we know the transformation is legal, check if it is
|
||||||
|
// profitable.
|
||||||
|
// TODO: Add a heuristic that actually looks at the function,
|
||||||
|
// and enable this for more cases.
|
||||||
|
|
||||||
|
// This transformation is always a win when we expected to have
|
||||||
|
// a reserved call frame. Under other circumstances, it may be either
|
||||||
|
// a win or a loss, and requires a heuristic.
|
||||||
|
// For now, enable it only for the relatively clear win cases.
|
||||||
|
bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
|
||||||
|
if (CannotReserveFrame)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// For now, don't even try to evaluate the profitability when
|
||||||
|
// not optimizing for size.
|
||||||
|
AttributeSet FnAttrs = MF.getFunction()->getAttributes();
|
||||||
|
bool OptForSize =
|
||||||
|
FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
|
||||||
|
Attribute::OptimizeForSize) ||
|
||||||
|
FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
|
||||||
|
|
||||||
|
if (!OptForSize)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Stack re-alignment can make this unprofitable even in terms of size.
|
||||||
|
// As mentioned above, a better heuristic is needed. For now, don't do this
|
||||||
|
// when the required alignment is above 8. (4 would be the safe choice, but
|
||||||
|
// some experimentation showed 8 is generally good).
|
||||||
|
if (TFL->getStackAlignment() > 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
TII = MF.getSubtarget().getInstrInfo();
|
||||||
|
TFL = MF.getSubtarget().getFrameLowering();
|
||||||
|
MRI = &MF.getRegInfo();
|
||||||
|
|
||||||
|
if (!shouldPerformTransformation(MF))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
|
||||||
|
|
||||||
|
bool Changed = false;
|
||||||
|
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
|
||||||
|
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
|
||||||
|
if (I->getOpcode() == FrameSetupOpcode)
|
||||||
|
Changed |= adjustCallSequence(MF, *BB, I);
|
||||||
|
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
|
||||||
|
MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I) {
|
||||||
|
|
||||||
|
// Check that this particular call sequence is amenable to the
|
||||||
|
// transformation.
|
||||||
|
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
|
||||||
|
MF.getSubtarget().getRegisterInfo());
|
||||||
|
unsigned StackPtr = RegInfo.getStackRegister();
|
||||||
|
int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
|
||||||
|
|
||||||
|
// We expect to enter this at the beginning of a call sequence
|
||||||
|
assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
|
||||||
|
MachineBasicBlock::iterator FrameSetup = I++;
|
||||||
|
|
||||||
|
|
||||||
|
// For globals in PIC mode, we can have some LEAs here.
|
||||||
|
// Ignore them, they don't bother us.
|
||||||
|
// TODO: Extend this to something that covers more cases.
|
||||||
|
while (I->getOpcode() == X86::LEA32r)
|
||||||
|
++I;
|
||||||
|
|
||||||
|
// We expect a copy instruction here.
|
||||||
|
// TODO: The copy instruction is a lowering artifact.
|
||||||
|
// We should also support a copy-less version, where the stack
|
||||||
|
// pointer is used directly.
|
||||||
|
if (!I->isCopy() || !I->getOperand(0).isReg())
|
||||||
|
return false;
|
||||||
|
MachineBasicBlock::iterator SPCopy = I++;
|
||||||
|
StackPtr = SPCopy->getOperand(0).getReg();
|
||||||
|
|
||||||
|
// Scan the call setup sequence for the pattern we're looking for.
|
||||||
|
// We only handle a simple case - a sequence of MOV32mi or MOV32mr
|
||||||
|
// instructions, that push a sequence of 32-bit values onto the stack, with
|
||||||
|
// no gaps between them.
|
||||||
|
SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
|
||||||
|
unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
|
||||||
|
if (MaxAdjust > 4)
|
||||||
|
MovVector.resize(MaxAdjust, nullptr);
|
||||||
|
|
||||||
|
do {
|
||||||
|
int Opcode = I->getOpcode();
|
||||||
|
if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// We only want movs of the form:
|
||||||
|
// movl imm/r32, k(%esp)
|
||||||
|
// If we run into something else, bail.
|
||||||
|
// Note that AddrBaseReg may, counter to its name, not be a register,
|
||||||
|
// but rather a frame index.
|
||||||
|
// TODO: Support the fi case. This should probably work now that we
|
||||||
|
// have the infrastructure to track the stack pointer within a call
|
||||||
|
// sequence.
|
||||||
|
if (!I->getOperand(X86::AddrBaseReg).isReg() ||
|
||||||
|
(I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
|
||||||
|
!I->getOperand(X86::AddrScaleAmt).isImm() ||
|
||||||
|
(I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
|
||||||
|
(I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
|
||||||
|
(I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
|
||||||
|
!I->getOperand(X86::AddrDisp).isImm())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
|
||||||
|
assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");
|
||||||
|
|
||||||
|
// We really don't want to consider the unaligned case.
|
||||||
|
if (StackDisp % 4)
|
||||||
|
return false;
|
||||||
|
StackDisp /= 4;
|
||||||
|
|
||||||
|
assert((size_t)StackDisp < MovVector.size() &&
|
||||||
|
"Function call has more parameters than the stack is adjusted for.");
|
||||||
|
|
||||||
|
// If the same stack slot is being filled twice, something's fishy.
|
||||||
|
if (MovVector[StackDisp] != nullptr)
|
||||||
|
return false;
|
||||||
|
MovVector[StackDisp] = I;
|
||||||
|
|
||||||
|
++I;
|
||||||
|
} while (I != MBB.end());
|
||||||
|
|
||||||
|
// We now expect the end of the sequence - a call and a stack adjust.
|
||||||
|
if (I == MBB.end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// For PCrel calls, we expect an additional COPY of the basereg.
|
||||||
|
// If we find one, skip it.
|
||||||
|
if (I->isCopy()) {
|
||||||
|
if (I->getOperand(1).getReg() ==
|
||||||
|
MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
|
||||||
|
++I;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!I->isCall())
|
||||||
|
return false;
|
||||||
|
MachineBasicBlock::iterator Call = I;
|
||||||
|
if ((++I)->getOpcode() != FrameDestroyOpcode)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Now, go through the vector, and see that we don't have any gaps,
|
||||||
|
// but only a series of 32-bit MOVs.
|
||||||
|
|
||||||
|
int64_t ExpectedDist = 0;
|
||||||
|
auto MMI = MovVector.begin(), MME = MovVector.end();
|
||||||
|
for (; MMI != MME; ++MMI, ExpectedDist += 4)
|
||||||
|
if (*MMI == nullptr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// If the call had no parameters, do nothing
|
||||||
|
if (!ExpectedDist)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We are either at the last parameter, or a gap.
|
||||||
|
// Make sure it's not a gap
|
||||||
|
for (; MMI != MME; ++MMI)
|
||||||
|
if (*MMI != nullptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Ok, we can in fact do the transformation for this call.
|
||||||
|
// Do not remove the FrameSetup instruction, but adjust the parameters.
|
||||||
|
// PEI will end up finalizing the handling of this.
|
||||||
|
FrameSetup->getOperand(1).setImm(ExpectedDist);
|
||||||
|
|
||||||
|
DebugLoc DL = I->getDebugLoc();
|
||||||
|
// Now, iterate through the vector in reverse order, and replace the movs
|
||||||
|
// with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
|
||||||
|
// replace uses.
|
||||||
|
for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
|
||||||
|
MachineBasicBlock::iterator MOV = *MovVector[Idx];
|
||||||
|
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
|
||||||
|
if (MOV->getOpcode() == X86::MOV32mi) {
|
||||||
|
unsigned PushOpcode = X86::PUSHi32;
|
||||||
|
// If the operand is a small (8-bit) immediate, we can use a
|
||||||
|
// PUSH instruction with a shorter encoding.
|
||||||
|
// Note that isImm() may fail even though this is a MOVmi, because
|
||||||
|
// the operand can also be a symbol.
|
||||||
|
if (PushOp.isImm()) {
|
||||||
|
int64_t Val = PushOp.getImm();
|
||||||
|
if (isInt<8>(Val))
|
||||||
|
PushOpcode = X86::PUSH32i8;
|
||||||
|
}
|
||||||
|
BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
|
||||||
|
} else {
|
||||||
|
unsigned int Reg = PushOp.getReg();
|
||||||
|
|
||||||
|
// If PUSHrmm is not slow on this target, try to fold the source of the
|
||||||
|
// push into the instruction.
|
||||||
|
const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
|
||||||
|
bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
|
||||||
|
|
||||||
|
// Check that this is legal to fold. Right now, we're extremely
|
||||||
|
// conservative about that.
|
||||||
|
MachineInstr *DefMov = nullptr;
|
||||||
|
if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
|
||||||
|
MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));
|
||||||
|
|
||||||
|
unsigned NumOps = DefMov->getDesc().getNumOperands();
|
||||||
|
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
|
||||||
|
Push->addOperand(DefMov->getOperand(i));
|
||||||
|
|
||||||
|
DefMov->eraseFromParent();
|
||||||
|
} else {
|
||||||
|
BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MBB.erase(MOV);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The stack-pointer copy is no longer used in the call sequences.
|
||||||
|
// There should not be any other users, but we can't commit to that, so:
|
||||||
|
if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
|
||||||
|
SPCopy->eraseFromParent();
|
||||||
|
|
||||||
|
// Once we've done this, we need to make sure PEI doesn't assume a reserved
|
||||||
|
// frame.
|
||||||
|
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
|
||||||
|
FuncInfo->setHasPushSequences(true);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
|
||||||
|
MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
|
||||||
|
// Do an extremely restricted form of load folding.
|
||||||
|
// ISel will often create patterns like:
|
||||||
|
// movl 4(%edi), %eax
|
||||||
|
// movl 8(%edi), %ecx
|
||||||
|
// movl 12(%edi), %edx
|
||||||
|
// movl %edx, 8(%esp)
|
||||||
|
// movl %ecx, 4(%esp)
|
||||||
|
// movl %eax, (%esp)
|
||||||
|
// call
|
||||||
|
// Get rid of those with prejudice.
|
||||||
|
if (!TargetRegisterInfo::isVirtualRegister(Reg))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Make sure this is the only use of Reg.
|
||||||
|
if (!MRI->hasOneNonDBGUse(Reg))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg);
|
||||||
|
|
||||||
|
// Make sure the def is a MOV from memory.
|
||||||
|
// If the def is an another block, give up.
|
||||||
|
if (DefMI->getOpcode() != X86::MOV32rm ||
|
||||||
|
DefMI->getParent() != FrameSetup->getParent())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Be careful with movs that load from a stack slot, since it may get
|
||||||
|
// resolved incorrectly.
|
||||||
|
// TODO: Again, we already have the infrastructure, so this should work.
|
||||||
|
if (!DefMI->getOperand(1).isReg())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Now, make sure everything else up until the ADJCALLSTACK is a sequence
|
||||||
|
// of MOVs. To be less conservative would require duplicating a lot of the
|
||||||
|
// logic from PeepholeOptimizer.
|
||||||
|
// FIXME: A possibly better approach would be to teach the PeepholeOptimizer
|
||||||
|
// to be smarter about folding into pushes.
|
||||||
|
for (auto I = DefMI; I != FrameSetup; ++I)
|
||||||
|
if (I->getOpcode() != X86::MOV32rm)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
return DefMI;
|
||||||
|
}
|
@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
|||||||
// Issue CALLSEQ_START
|
// Issue CALLSEQ_START
|
||||||
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
|
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
|
||||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
|
||||||
.addImm(NumBytes);
|
.addImm(NumBytes).addImm(0);
|
||||||
|
|
||||||
// Walk the register/memloc assignments, inserting copies/loads.
|
// Walk the register/memloc assignments, inserting copies/loads.
|
||||||
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
|
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
|
||||||
|
@ -38,7 +38,34 @@ using namespace llvm;
|
|||||||
extern cl::opt<bool> ForceStackAlign;
|
extern cl::opt<bool> ForceStackAlign;
|
||||||
|
|
||||||
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||||
return !MF.getFrameInfo()->hasVarSizedObjects();
|
return !MF.getFrameInfo()->hasVarSizedObjects() &&
|
||||||
|
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
|
||||||
|
/// call frame pseudos can be simplified. Having a FP, as in the default
|
||||||
|
/// implementation, is not sufficient here since we can't always use it.
|
||||||
|
/// Use a more nuanced condition.
|
||||||
|
bool
|
||||||
|
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
|
||||||
|
const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
|
||||||
|
(MF.getSubtarget().getRegisterInfo());
|
||||||
|
return hasReservedCallFrame(MF) ||
|
||||||
|
(hasFP(MF) && !TRI->needsStackRealignment(MF))
|
||||||
|
|| TRI->hasBasePointer(MF);
|
||||||
|
}
|
||||||
|
|
||||||
|
// needsFrameIndexResolution - Do we need to perform FI resolution for
|
||||||
|
// this function. Normally, this is required only when the function
|
||||||
|
// has any stack objects. However, FI resolution actually has another job,
|
||||||
|
// not apparent from the title - it resolves callframesetup/destroy
|
||||||
|
// that were not simplified earlier.
|
||||||
|
// So, this is required for x86 functions that have push sequences even
|
||||||
|
// when there are no stack objects.
|
||||||
|
bool
|
||||||
|
X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
|
||||||
|
return MF.getFrameInfo()->hasStackObjects() ||
|
||||||
|
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// hasFP - Return true if the specified function should have a dedicated frame
|
/// hasFP - Return true if the specified function should have a dedicated frame
|
||||||
@ -101,16 +128,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
|
|||||||
return X86::AND32ri;
|
return X86::AND32ri;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
|
|
||||||
// We don't support LP64 for now.
|
|
||||||
assert(!IsLP64);
|
|
||||||
|
|
||||||
if (MO.isImm() && isInt<8>(MO.getImm()))
|
|
||||||
return X86::PUSH32i8;
|
|
||||||
|
|
||||||
return X86::PUSHi32;;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned getLEArOpcode(unsigned IsLP64) {
|
static unsigned getLEArOpcode(unsigned IsLP64) {
|
||||||
return IsLP64 ? X86::LEA64r : X86::LEA32r;
|
return IsLP64 ? X86::LEA64r : X86::LEA32r;
|
||||||
}
|
}
|
||||||
@ -1917,100 +1934,6 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86FrameLowering::
|
|
||||||
convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator I, uint64_t Amount) const {
|
|
||||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
|
||||||
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
|
|
||||||
MF.getSubtarget().getRegisterInfo());
|
|
||||||
unsigned StackPtr = RegInfo.getStackRegister();
|
|
||||||
|
|
||||||
// Scan the call setup sequence for the pattern we're looking for.
|
|
||||||
// We only handle a simple case now - a sequence of MOV32mi or MOV32mr
|
|
||||||
// instructions, that push a sequence of 32-bit values onto the stack, with
|
|
||||||
// no gaps.
|
|
||||||
std::map<int64_t, MachineBasicBlock::iterator> MovMap;
|
|
||||||
do {
|
|
||||||
int Opcode = I->getOpcode();
|
|
||||||
if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
|
|
||||||
break;
|
|
||||||
|
|
||||||
// We only want movs of the form:
|
|
||||||
// movl imm/r32, k(%ecx)
|
|
||||||
// If we run into something else, bail
|
|
||||||
// Note that AddrBaseReg may, counterintuitively, not be a register...
|
|
||||||
if (!I->getOperand(X86::AddrBaseReg).isReg() ||
|
|
||||||
(I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
|
|
||||||
!I->getOperand(X86::AddrScaleAmt).isImm() ||
|
|
||||||
(I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
|
|
||||||
(I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
|
|
||||||
(I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
|
|
||||||
!I->getOperand(X86::AddrDisp).isImm())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
|
|
||||||
|
|
||||||
// We don't want to consider the unaligned case.
|
|
||||||
if (StackDisp % 4)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// If the same stack slot is being filled twice, something's fishy.
|
|
||||||
if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
++I;
|
|
||||||
} while (I != MBB.end());
|
|
||||||
|
|
||||||
// We now expect the end of the sequence - a call and a stack adjust.
|
|
||||||
if (I == MBB.end())
|
|
||||||
return false;
|
|
||||||
if (!I->isCall())
|
|
||||||
return false;
|
|
||||||
MachineBasicBlock::iterator Call = I;
|
|
||||||
if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Now, go through the map, and see that we don't have any gaps,
|
|
||||||
// but only a series of 32-bit MOVs.
|
|
||||||
// Since std::map provides ordered iteration, the original order
|
|
||||||
// of the MOVs doesn't matter.
|
|
||||||
int64_t ExpectedDist = 0;
|
|
||||||
for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
|
|
||||||
++MMI, ExpectedDist += 4)
|
|
||||||
if (MMI->first != ExpectedDist)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Ok, everything looks fine. Do the transformation.
|
|
||||||
DebugLoc DL = I->getDebugLoc();
|
|
||||||
|
|
||||||
// It's possible the original stack adjustment amount was larger than
|
|
||||||
// that done by the pushes. If so, we still need a SUB.
|
|
||||||
Amount -= ExpectedDist;
|
|
||||||
if (Amount) {
|
|
||||||
MachineInstr* Sub = BuildMI(MBB, Call, DL,
|
|
||||||
TII.get(getSUBriOpcode(false, Amount)), StackPtr)
|
|
||||||
.addReg(StackPtr).addImm(Amount);
|
|
||||||
Sub->getOperand(3).setIsDead();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, iterate through the map in reverse order, and replace the movs
|
|
||||||
// with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
|
|
||||||
for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
|
|
||||||
MachineBasicBlock::iterator MOV = MMI->second;
|
|
||||||
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
|
|
||||||
|
|
||||||
// Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
|
|
||||||
int PushOpcode = X86::PUSH32r;
|
|
||||||
if (MOV->getOpcode() == X86::MOV32mi)
|
|
||||||
PushOpcode = getPUSHiOpcode(false, PushOp);
|
|
||||||
|
|
||||||
BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
|
|
||||||
MBB.erase(MOV);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void X86FrameLowering::
|
void X86FrameLowering::
|
||||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator I) const {
|
MachineBasicBlock::iterator I) const {
|
||||||
@ -2025,7 +1948,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
|||||||
bool IsLP64 = STI.isTarget64BitLP64();
|
bool IsLP64 = STI.isTarget64BitLP64();
|
||||||
DebugLoc DL = I->getDebugLoc();
|
DebugLoc DL = I->getDebugLoc();
|
||||||
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
|
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
|
||||||
uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
|
uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
|
||||||
I = MBB.erase(I);
|
I = MBB.erase(I);
|
||||||
|
|
||||||
if (!reserveCallFrame) {
|
if (!reserveCallFrame) {
|
||||||
@ -2045,24 +1968,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
|||||||
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
|
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
|
||||||
|
|
||||||
MachineInstr *New = nullptr;
|
MachineInstr *New = nullptr;
|
||||||
if (Opcode == TII.getCallFrameSetupOpcode()) {
|
|
||||||
// Try to convert movs to the stack into pushes.
|
|
||||||
// We currently only look for a pattern that appears in 32-bit
|
|
||||||
// calling conventions.
|
|
||||||
if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
|
|
||||||
return;
|
|
||||||
|
|
||||||
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
|
// Factor out the amount that gets handled inside the sequence
|
||||||
StackPtr)
|
// (Pushes of argument for frame setup, callee pops for frame destroy)
|
||||||
.addReg(StackPtr)
|
Amount -= InternalAmt;
|
||||||
.addImm(Amount);
|
|
||||||
|
if (Amount) {
|
||||||
|
if (Opcode == TII.getCallFrameSetupOpcode()) {
|
||||||
|
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
|
||||||
|
.addReg(StackPtr).addImm(Amount);
|
||||||
} else {
|
} else {
|
||||||
assert(Opcode == TII.getCallFrameDestroyOpcode());
|
assert(Opcode == TII.getCallFrameDestroyOpcode());
|
||||||
|
|
||||||
// Factor out the amount the callee already popped.
|
|
||||||
Amount -= CalleeAmt;
|
|
||||||
|
|
||||||
if (Amount) {
|
|
||||||
unsigned Opc = getADDriOpcode(IsLP64, Amount);
|
unsigned Opc = getADDriOpcode(IsLP64, Amount);
|
||||||
New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
|
New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
|
||||||
.addReg(StackPtr).addImm(Amount);
|
.addReg(StackPtr).addImm(Amount);
|
||||||
@ -2080,13 +1997,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
|
if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
|
||||||
// If we are performing frame pointer elimination and if the callee pops
|
// If we are performing frame pointer elimination and if the callee pops
|
||||||
// something off the stack pointer, add it back. We do this until we have
|
// something off the stack pointer, add it back. We do this until we have
|
||||||
// more advanced stack pointer tracking ability.
|
// more advanced stack pointer tracking ability.
|
||||||
unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
|
unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
|
||||||
MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
|
MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
|
||||||
.addReg(StackPtr).addImm(CalleeAmt);
|
.addReg(StackPtr).addImm(InternalAmt);
|
||||||
|
|
||||||
// The EFLAGS implicit def is dead.
|
// The EFLAGS implicit def is dead.
|
||||||
New->getOperand(3).setIsDead();
|
New->getOperand(3).setIsDead();
|
||||||
|
@ -66,6 +66,8 @@ public:
|
|||||||
|
|
||||||
bool hasFP(const MachineFunction &MF) const override;
|
bool hasFP(const MachineFunction &MF) const override;
|
||||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||||
|
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
|
||||||
|
bool needsFrameIndexResolution(const MachineFunction &MF) const override;
|
||||||
|
|
||||||
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
|
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
|
||||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||||
|
@ -43,15 +43,18 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
|
|||||||
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
|
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
|
||||||
// sub / add which can clobber EFLAGS.
|
// sub / add which can clobber EFLAGS.
|
||||||
let Defs = [ESP, EFLAGS], Uses = [ESP] in {
|
let Defs = [ESP, EFLAGS], Uses = [ESP] in {
|
||||||
def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
|
def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||||
"#ADJCALLSTACKDOWN",
|
"#ADJCALLSTACKDOWN",
|
||||||
[(X86callseq_start timm:$amt)]>,
|
[]>,
|
||||||
Requires<[NotLP64]>;
|
Requires<[NotLP64]>;
|
||||||
def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||||
"#ADJCALLSTACKUP",
|
"#ADJCALLSTACKUP",
|
||||||
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
|
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
|
||||||
Requires<[NotLP64]>;
|
Requires<[NotLP64]>;
|
||||||
}
|
}
|
||||||
|
def : Pat<(X86callseq_start timm:$amt1),
|
||||||
|
(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
|
||||||
|
|
||||||
|
|
||||||
// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
|
// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
|
||||||
// a stack adjustment and the codegen must know that they may modify the stack
|
// a stack adjustment and the codegen must know that they may modify the stack
|
||||||
@ -59,16 +62,17 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
|||||||
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
|
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
|
||||||
// sub / add which can clobber EFLAGS.
|
// sub / add which can clobber EFLAGS.
|
||||||
let Defs = [RSP, EFLAGS], Uses = [RSP] in {
|
let Defs = [RSP, EFLAGS], Uses = [RSP] in {
|
||||||
def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
|
def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||||
"#ADJCALLSTACKDOWN",
|
"#ADJCALLSTACKDOWN",
|
||||||
[(X86callseq_start timm:$amt)]>,
|
[]>,
|
||||||
Requires<[IsLP64]>;
|
Requires<[IsLP64]>;
|
||||||
def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||||
"#ADJCALLSTACKUP",
|
"#ADJCALLSTACKUP",
|
||||||
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
|
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
|
||||||
Requires<[IsLP64]>;
|
Requires<[IsLP64]>;
|
||||||
}
|
}
|
||||||
|
def : Pat<(X86callseq_start timm:$amt1),
|
||||||
|
(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
|
||||||
|
|
||||||
|
|
||||||
// x86-64 va_start lowering magic.
|
// x86-64 va_start lowering magic.
|
||||||
|
@ -1804,6 +1804,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
|
||||||
|
const MachineFunction *MF = MI->getParent()->getParent();
|
||||||
|
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
|
||||||
|
|
||||||
|
if (MI->getOpcode() == getCallFrameSetupOpcode() ||
|
||||||
|
MI->getOpcode() == getCallFrameDestroyOpcode()) {
|
||||||
|
unsigned StackAlign = TFI->getStackAlignment();
|
||||||
|
int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
|
||||||
|
StackAlign;
|
||||||
|
|
||||||
|
SPAdj -= MI->getOperand(1).getImm();
|
||||||
|
|
||||||
|
if (MI->getOpcode() == getCallFrameSetupOpcode())
|
||||||
|
return SPAdj;
|
||||||
|
else
|
||||||
|
return -SPAdj;
|
||||||
|
}
|
||||||
|
|
||||||
|
// To know whether a call adjusts the stack, we need information
|
||||||
|
// that is bound to the following ADJCALLSTACKUP pseudo.
|
||||||
|
// Look for the next ADJCALLSTACKUP that follows the call.
|
||||||
|
if (MI->isCall()) {
|
||||||
|
const MachineBasicBlock* MBB = MI->getParent();
|
||||||
|
auto I = ++MachineBasicBlock::const_iterator(MI);
|
||||||
|
for (auto E = MBB->end(); I != E; ++I) {
|
||||||
|
if (I->getOpcode() == getCallFrameDestroyOpcode() ||
|
||||||
|
I->isCall())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we could not find a frame destroy opcode, then it has already
|
||||||
|
// been simplified, so we don't care.
|
||||||
|
if (I->getOpcode() != getCallFrameDestroyOpcode())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return -(I->getOperand(1).getImm());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently handle only PUSHes we can reasonably expect to see
|
||||||
|
// in call sequences
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
case X86::PUSH32i8:
|
||||||
|
case X86::PUSH32r:
|
||||||
|
case X86::PUSH32rmm:
|
||||||
|
case X86::PUSH32rmr:
|
||||||
|
case X86::PUSHi32:
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// isFrameOperand - Return true and the FrameIndex if the specified
|
/// isFrameOperand - Return true and the FrameIndex if the specified
|
||||||
/// operand and follow operands form a reference to the stack frame.
|
/// operand and follow operands form a reference to the stack frame.
|
||||||
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
||||||
|
@ -175,6 +175,11 @@ public:
|
|||||||
///
|
///
|
||||||
const X86RegisterInfo &getRegisterInfo() const { return RI; }
|
const X86RegisterInfo &getRegisterInfo() const { return RI; }
|
||||||
|
|
||||||
|
/// getSPAdjust - This returns the stack pointer adjustment made by
|
||||||
|
/// this instruction. For x86, we need to handle more complex call
|
||||||
|
/// sequences involving PUSHes.
|
||||||
|
int getSPAdjust(const MachineInstr *MI) const override;
|
||||||
|
|
||||||
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
|
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
|
||||||
/// extension instruction. That is, it's like a copy where it's legal for the
|
/// extension instruction. That is, it's like a copy where it's legal for the
|
||||||
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
|
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
|
||||||
|
@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||||||
unsigned ArgumentStackSize;
|
unsigned ArgumentStackSize;
|
||||||
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
|
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
|
||||||
unsigned NumLocalDynamics;
|
unsigned NumLocalDynamics;
|
||||||
|
/// HasPushSequences - Keeps track of whether this function uses sequences
|
||||||
|
/// of pushes to pass function parameters.
|
||||||
|
bool HasPushSequences;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
||||||
@ -97,7 +100,8 @@ public:
|
|||||||
VarArgsGPOffset(0),
|
VarArgsGPOffset(0),
|
||||||
VarArgsFPOffset(0),
|
VarArgsFPOffset(0),
|
||||||
ArgumentStackSize(0),
|
ArgumentStackSize(0),
|
||||||
NumLocalDynamics(0) {}
|
NumLocalDynamics(0),
|
||||||
|
HasPushSequences(false) {}
|
||||||
|
|
||||||
explicit X86MachineFunctionInfo(MachineFunction &MF)
|
explicit X86MachineFunctionInfo(MachineFunction &MF)
|
||||||
: ForceFramePointer(false),
|
: ForceFramePointer(false),
|
||||||
@ -113,11 +117,15 @@ public:
|
|||||||
VarArgsGPOffset(0),
|
VarArgsGPOffset(0),
|
||||||
VarArgsFPOffset(0),
|
VarArgsFPOffset(0),
|
||||||
ArgumentStackSize(0),
|
ArgumentStackSize(0),
|
||||||
NumLocalDynamics(0) {}
|
NumLocalDynamics(0),
|
||||||
|
HasPushSequences(false) {}
|
||||||
|
|
||||||
bool getForceFramePointer() const { return ForceFramePointer;}
|
bool getForceFramePointer() const { return ForceFramePointer;}
|
||||||
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
||||||
|
|
||||||
|
bool getHasPushSequences() const { return HasPushSequences; }
|
||||||
|
void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; }
|
||||||
|
|
||||||
bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
|
bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
|
||||||
void setRestoreBasePointer(const MachineFunction *MF);
|
void setRestoreBasePointer(const MachineFunction *MF);
|
||||||
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
|
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
|
||||||
|
@ -468,8 +468,6 @@ void
|
|||||||
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
int SPAdj, unsigned FIOperandNum,
|
int SPAdj, unsigned FIOperandNum,
|
||||||
RegScavenger *RS) const {
|
RegScavenger *RS) const {
|
||||||
assert(SPAdj == 0 && "Unexpected");
|
|
||||||
|
|
||||||
MachineInstr &MI = *II;
|
MachineInstr &MI = *II;
|
||||||
MachineFunction &MF = *MI.getParent()->getParent();
|
MachineFunction &MF = *MI.getParent()->getParent();
|
||||||
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
|
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
|
||||||
@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||||||
} else
|
} else
|
||||||
FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
|
FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
|
||||||
|
|
||||||
|
if (BasePtr == StackPtr)
|
||||||
|
FIOffset += SPAdj;
|
||||||
|
|
||||||
// The frame index format for stackmaps and patchpoints is different from the
|
// The frame index format for stackmaps and patchpoints is different from the
|
||||||
// X86 format. It only has a FI and an offset.
|
// X86 format. It only has a FI and an offset.
|
||||||
if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
|
if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
|
||||||
|
@ -193,6 +193,7 @@ public:
|
|||||||
void addIRPasses() override;
|
void addIRPasses() override;
|
||||||
bool addInstSelector() override;
|
bool addInstSelector() override;
|
||||||
bool addILPOpts() override;
|
bool addILPOpts() override;
|
||||||
|
void addPreRegAlloc() override;
|
||||||
void addPostRegAlloc() override;
|
void addPostRegAlloc() override;
|
||||||
void addPreEmitPass() override;
|
void addPreEmitPass() override;
|
||||||
};
|
};
|
||||||
@ -226,6 +227,10 @@ bool X86PassConfig::addILPOpts() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void X86PassConfig::addPreRegAlloc() {
|
||||||
|
addPass(createX86CallFrameOptimization());
|
||||||
|
}
|
||||||
|
|
||||||
void X86PassConfig::addPostRegAlloc() {
|
void X86PassConfig::addPostRegAlloc() {
|
||||||
addPass(createX86FloatingPointStackifierPass());
|
addPass(createX86FloatingPointStackifierPass());
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,7 @@ blah:
|
|||||||
to label %invoke.cont unwind label %lpad
|
to label %invoke.cont unwind label %lpad
|
||||||
|
|
||||||
; Uses end as sret param.
|
; Uses end as sret param.
|
||||||
; CHECK: movl %[[end]], (%esp)
|
; CHECK: pushl %[[end]]
|
||||||
; CHECK: calll _plus
|
; CHECK: calll _plus
|
||||||
|
|
||||||
invoke.cont:
|
invoke.cont:
|
||||||
|
@ -1,24 +1,65 @@
|
|||||||
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
|
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
|
||||||
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
|
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
|
||||||
|
|
||||||
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
|
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
|
||||||
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
|
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
|
||||||
|
|
||||||
; Here, we should have a reserved frame, so we don't expect pushes
|
; Here, we should have a reserved frame, so we don't expect pushes
|
||||||
; NORMAL-LABEL: test1
|
; NORMAL-LABEL: test1:
|
||||||
; NORMAL: subl $16, %esp
|
; NORMAL: subl $16, %esp
|
||||||
; NORMAL-NEXT: movl $4, 12(%esp)
|
; NORMAL-NEXT: movl $4, 12(%esp)
|
||||||
; NORMAL-NEXT: movl $3, 8(%esp)
|
; NORMAL-NEXT: movl $3, 8(%esp)
|
||||||
; NORMAL-NEXT: movl $2, 4(%esp)
|
; NORMAL-NEXT: movl $2, 4(%esp)
|
||||||
; NORMAL-NEXT: movl $1, (%esp)
|
; NORMAL-NEXT: movl $1, (%esp)
|
||||||
; NORMAL-NEXT: call
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
define void @test1() {
|
define void @test1() {
|
||||||
entry:
|
entry:
|
||||||
call void @good(i32 1, i32 2, i32 3, i32 4)
|
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Here, we expect a sequence of 4 immediate pushes
|
; We're optimizing for code size, so we should get pushes for x86,
|
||||||
; NORMAL-LABEL: test2
|
; even though there is a reserved call frame.
|
||||||
|
; Make sure we don't touch x86-64
|
||||||
|
; NORMAL-LABEL: test1b:
|
||||||
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
|
; NORMAL: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl $3
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
; X64-LABEL: test1b:
|
||||||
|
; X64: movl $1, %ecx
|
||||||
|
; X64-NEXT: movl $2, %edx
|
||||||
|
; X64-NEXT: movl $3, %r8d
|
||||||
|
; X64-NEXT: movl $4, %r9d
|
||||||
|
; X64-NEXT: callq good
|
||||||
|
define void @test1b() optsize {
|
||||||
|
entry:
|
||||||
|
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Same as above, but for minsize
|
||||||
|
; NORMAL-LABEL: test1c:
|
||||||
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
|
; NORMAL: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl $3
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test1c() minsize {
|
||||||
|
entry:
|
||||||
|
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; If we have a reserved frame, we should have pushes
|
||||||
|
; NORMAL-LABEL: test2:
|
||||||
; NORMAL-NOT: subl {{.*}} %esp
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
; NORMAL: pushl $4
|
; NORMAL: pushl $4
|
||||||
; NORMAL-NEXT: pushl $3
|
; NORMAL-NEXT: pushl $3
|
||||||
@ -34,53 +75,53 @@ entry:
|
|||||||
|
|
||||||
; Again, we expect a sequence of 4 immediate pushes
|
; Again, we expect a sequence of 4 immediate pushes
|
||||||
; Checks that we generate the right pushes for >8bit immediates
|
; Checks that we generate the right pushes for >8bit immediates
|
||||||
; NORMAL-LABEL: test2b
|
; NORMAL-LABEL: test2b:
|
||||||
; NORMAL-NOT: subl {{.*}} %esp
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
; NORMAL: pushl $4096
|
; NORMAL: pushl $4096
|
||||||
; NORMAL-NEXT: pushl $3072
|
; NORMAL-NEXT: pushl $3072
|
||||||
; NORMAL-NEXT: pushl $2048
|
; NORMAL-NEXT: pushl $2048
|
||||||
; NORMAL-NEXT: pushl $1024
|
; NORMAL-NEXT: pushl $1024
|
||||||
; NORMAL-NEXT: call
|
; NORMAL-NEXT: call
|
||||||
define void @test2b(i32 %k) {
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test2b() optsize {
|
||||||
entry:
|
entry:
|
||||||
%a = alloca i32, i32 %k
|
|
||||||
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
|
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; The first push should push a register
|
; The first push should push a register
|
||||||
; NORMAL-LABEL: test3
|
; NORMAL-LABEL: test3:
|
||||||
; NORMAL-NOT: subl {{.*}} %esp
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
; NORMAL: pushl $4
|
; NORMAL: pushl $4
|
||||||
; NORMAL-NEXT: pushl $3
|
; NORMAL-NEXT: pushl $3
|
||||||
; NORMAL-NEXT: pushl $2
|
; NORMAL-NEXT: pushl $2
|
||||||
; NORMAL-NEXT: pushl %e{{..}}
|
; NORMAL-NEXT: pushl %e{{..}}
|
||||||
; NORMAL-NEXT: call
|
; NORMAL-NEXT: call
|
||||||
define void @test3(i32 %k) {
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test3(i32 %k) optsize {
|
||||||
entry:
|
entry:
|
||||||
%a = alloca i32, i32 %k
|
|
||||||
call void @good(i32 %k, i32 2, i32 3, i32 4)
|
call void @good(i32 %k, i32 2, i32 3, i32 4)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; We don't support weird calling conventions
|
; We don't support weird calling conventions
|
||||||
; NORMAL-LABEL: test4
|
; NORMAL-LABEL: test4:
|
||||||
; NORMAL: subl $12, %esp
|
; NORMAL: subl $12, %esp
|
||||||
; NORMAL-NEXT: movl $4, 8(%esp)
|
; NORMAL-NEXT: movl $4, 8(%esp)
|
||||||
; NORMAL-NEXT: movl $3, 4(%esp)
|
; NORMAL-NEXT: movl $3, 4(%esp)
|
||||||
; NORMAL-NEXT: movl $1, (%esp)
|
; NORMAL-NEXT: movl $1, (%esp)
|
||||||
; NORMAL-NEXT: movl $2, %eax
|
; NORMAL-NEXT: movl $2, %eax
|
||||||
; NORMAL-NEXT: call
|
; NORMAL-NEXT: call
|
||||||
define void @test4(i32 %k) {
|
; NORMAL-NEXT: addl $12, %esp
|
||||||
|
define void @test4() optsize {
|
||||||
entry:
|
entry:
|
||||||
%a = alloca i32, i32 %k
|
|
||||||
call void @inreg(i32 1, i32 2, i32 3, i32 4)
|
call void @inreg(i32 1, i32 2, i32 3, i32 4)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Check that additional alignment is added when the pushes
|
; When there is no reserved call frame, check that additional alignment
|
||||||
; don't add up to the required alignment.
|
; is added when the pushes don't add up to the required alignment.
|
||||||
; ALIGNED-LABEL: test5
|
; ALIGNED-LABEL: test5:
|
||||||
; ALIGNED: subl $16, %esp
|
; ALIGNED: subl $16, %esp
|
||||||
; ALIGNED-NEXT: pushl $4
|
; ALIGNED-NEXT: pushl $4
|
||||||
; ALIGNED-NEXT: pushl $3
|
; ALIGNED-NEXT: pushl $3
|
||||||
@ -97,7 +138,7 @@ entry:
|
|||||||
; Check that pushing the addresses of globals (Or generally, things that
|
; Check that pushing the addresses of globals (Or generally, things that
|
||||||
; aren't exactly immediates) isn't broken.
|
; aren't exactly immediates) isn't broken.
|
||||||
; Fixes PR21878.
|
; Fixes PR21878.
|
||||||
; NORMAL-LABEL: test6
|
; NORMAL-LABEL: test6:
|
||||||
; NORMAL: pushl $_ext
|
; NORMAL: pushl $_ext
|
||||||
; NORMAL-NEXT: call
|
; NORMAL-NEXT: call
|
||||||
declare void @f(i8*)
|
declare void @f(i8*)
|
||||||
@ -110,3 +151,108 @@ bb:
|
|||||||
alloca i32
|
alloca i32
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Check that we fold simple cases into the push
|
||||||
|
; NORMAL-LABEL: test7:
|
||||||
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
|
; NORMAL: movl 4(%esp), [[EAX:%e..]]
|
||||||
|
; NORMAL-NEXT: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl ([[EAX]])
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test7(i32* %ptr) optsize {
|
||||||
|
entry:
|
||||||
|
%val = load i32* %ptr
|
||||||
|
call void @good(i32 1, i32 2, i32 %val, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; But we don't want to fold stack-relative loads into the push,
|
||||||
|
; because the offset will be wrong
|
||||||
|
; NORMAL-LABEL: test8:
|
||||||
|
; NORMAL-NOT: subl {{.*}} %esp
|
||||||
|
; NORMAL: movl 4(%esp), [[EAX:%e..]]
|
||||||
|
; NORMAL-NEXT: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl [[EAX]]
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test8(i32* %ptr) optsize {
|
||||||
|
entry:
|
||||||
|
%val = ptrtoint i32* %ptr to i32
|
||||||
|
call void @good(i32 1, i32 2, i32 %val, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; If one function is using push instructions, and the other isn't
|
||||||
|
; (because it has frame-index references), then we must resolve
|
||||||
|
; these references correctly.
|
||||||
|
; NORMAL-LABEL: test9:
|
||||||
|
; NORMAL-NOT: leal (%esp),
|
||||||
|
; NORMAL: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl $3
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
; NORMAL-NEXT: subl $16, %esp
|
||||||
|
; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
|
||||||
|
; NORMAL-NEXT: movl [[EAX]], 12(%esp)
|
||||||
|
; NORMAL-NEXT: movl $7, 8(%esp)
|
||||||
|
; NORMAL-NEXT: movl $6, 4(%esp)
|
||||||
|
; NORMAL-NEXT: movl $5, (%esp)
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test9() optsize {
|
||||||
|
entry:
|
||||||
|
%p = alloca i32, align 4
|
||||||
|
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||||
|
%0 = ptrtoint i32* %p to i32
|
||||||
|
call void @good(i32 5, i32 6, i32 7, i32 %0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; We can end up with an indirect call which gets reloaded on the spot.
|
||||||
|
; Make sure we reference the correct stack slot - we spill into (%esp)
|
||||||
|
; and reload from 16(%esp) due to the pushes.
|
||||||
|
; NORMAL-LABEL: test10:
|
||||||
|
; NORMAL: movl $_good, [[ALLOC:.*]]
|
||||||
|
; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
|
||||||
|
; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
|
||||||
|
; NORMAL: nop
|
||||||
|
; NORMAL: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl $3
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl $1
|
||||||
|
; NORMAL-NEXT: calll *16(%esp)
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
define void @test10() optsize {
|
||||||
|
%stack_fptr = alloca void (i32, i32, i32, i32)*
|
||||||
|
store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
|
||||||
|
%good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr
|
||||||
|
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
|
||||||
|
call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; We can't fold the load from the global into the push because of
|
||||||
|
; interference from the store
|
||||||
|
; NORMAL-LABEL: test11:
|
||||||
|
; NORMAL: movl _the_global, [[EAX:%e..]]
|
||||||
|
; NORMAL-NEXT: movl $42, _the_global
|
||||||
|
; NORMAL-NEXT: pushl $4
|
||||||
|
; NORMAL-NEXT: pushl $3
|
||||||
|
; NORMAL-NEXT: pushl $2
|
||||||
|
; NORMAL-NEXT: pushl [[EAX]]
|
||||||
|
; NORMAL-NEXT: call
|
||||||
|
; NORMAL-NEXT: addl $16, %esp
|
||||||
|
@the_global = external global i32
|
||||||
|
define void @test11() optsize {
|
||||||
|
%myload = load i32* @the_global
|
||||||
|
store i32 42, i32* @the_global
|
||||||
|
call void @good(i32 %myload, i32 2, i32 3, i32 4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user