mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AArch64] Implement stack probing for windows
Differential Revision: https://reviews.llvm.org/D41131 llvm-svn: 321150
This commit is contained in:
parent
a9fd04c09d
commit
786b1663a9
@ -288,3 +288,31 @@ standard stack probe emission.
|
|||||||
|
|
||||||
The MSVC environment does not emit code for VLAs currently.
|
The MSVC environment does not emit code for VLAs currently.
|
||||||
|
|
||||||
|
Windows on ARM64
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Stack Probe Emission
|
||||||
|
^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The reference implementation (Microsoft Visual Studio 2017) emits stack probes
|
||||||
|
in the following fashion:
|
||||||
|
|
||||||
|
.. code-block:: gas
|
||||||
|
|
||||||
|
mov x15, #constant
|
||||||
|
bl __chkstk
|
||||||
|
sub sp, sp, x15, lsl #4
|
||||||
|
|
||||||
|
However, this has the limitation of 256 MiB (±128MiB). In order to accommodate
|
||||||
|
larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
|
||||||
|
(±4GiB) range via a slight deviation. It will generate an indirect jump as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
.. code-block:: gas
|
||||||
|
|
||||||
|
mov x15, #constant
|
||||||
|
adrp x16, __chkstk
|
||||||
|
add x16, x16, :lo12:__chkstk
|
||||||
|
blr x16
|
||||||
|
sub sp, sp, x15, lsl #4
|
||||||
|
|
||||||
|
@ -97,6 +97,7 @@
|
|||||||
#include "AArch64RegisterInfo.h"
|
#include "AArch64RegisterInfo.h"
|
||||||
#include "AArch64Subtarget.h"
|
#include "AArch64Subtarget.h"
|
||||||
#include "AArch64TargetMachine.h"
|
#include "AArch64TargetMachine.h"
|
||||||
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||||
@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsPrologue(
|
|||||||
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
|
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool windowsRequiresStackProbe(MachineFunction &MF,
|
||||||
|
unsigned StackSizeInBytes) {
|
||||||
|
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
||||||
|
if (!Subtarget.isTargetWindows())
|
||||||
|
return false;
|
||||||
|
const Function &F = MF.getFunction();
|
||||||
|
// TODO: When implementing stack protectors, take that into account
|
||||||
|
// for the probe threshold.
|
||||||
|
unsigned StackProbeSize = 4096;
|
||||||
|
if (F.hasFnAttribute("stack-probe-size"))
|
||||||
|
F.getFnAttribute("stack-probe-size")
|
||||||
|
.getValueAsString()
|
||||||
|
.getAsInteger(0, StackProbeSize);
|
||||||
|
return StackSizeInBytes >= StackProbeSize;
|
||||||
|
}
|
||||||
|
|
||||||
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
|
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
|
||||||
MachineFunction &MF, unsigned StackBumpBytes) const {
|
MachineFunction &MF, unsigned StackBumpBytes) const {
|
||||||
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||||
@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
|
|||||||
|
|
||||||
// 512 is the maximum immediate for stp/ldp that will be used for
|
// 512 is the maximum immediate for stp/ldp that will be used for
|
||||||
// callee-save save/restores
|
// callee-save save/restores
|
||||||
if (StackBumpBytes >= 512)
|
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (MFI.hasVarSizedObjects())
|
if (MFI.hasVarSizedObjects())
|
||||||
@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
int NumBytes = (int)MFI.getStackSize();
|
int NumBytes = (int)MFI.getStackSize();
|
||||||
if (!AFI->hasStackFrame()) {
|
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
|
||||||
assert(!HasFP && "unexpected function without stack frame but with FP");
|
assert(!HasFP && "unexpected function without stack frame but with FP");
|
||||||
|
|
||||||
// All of the stack allocation is for locals.
|
// All of the stack allocation is for locals.
|
||||||
@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
MachineInstr::FrameSetup);
|
MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (windowsRequiresStackProbe(MF, NumBytes)) {
|
||||||
|
uint32_t NumWords = NumBytes >> 4;
|
||||||
|
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
|
||||||
|
.addImm(NumWords)
|
||||||
|
.setMIFlags(MachineInstr::FrameSetup);
|
||||||
|
|
||||||
|
switch (MF.getTarget().getCodeModel()) {
|
||||||
|
case CodeModel::Small:
|
||||||
|
case CodeModel::Medium:
|
||||||
|
case CodeModel::Kernel:
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
|
||||||
|
.addExternalSymbol("__chkstk")
|
||||||
|
.addReg(AArch64::X15, RegState::Implicit)
|
||||||
|
.setMIFlags(MachineInstr::FrameSetup);
|
||||||
|
break;
|
||||||
|
case CodeModel::Large:
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
|
||||||
|
.addReg(AArch64::X16, RegState::Define)
|
||||||
|
.addExternalSymbol("__chkstk")
|
||||||
|
.addExternalSymbol("__chkstk")
|
||||||
|
.setMIFlags(MachineInstr::FrameSetup);
|
||||||
|
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
|
||||||
|
.addReg(AArch64::X16, RegState::Kill)
|
||||||
|
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
|
||||||
|
.setMIFlags(MachineInstr::FrameSetup);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
|
||||||
|
.addReg(AArch64::SP, RegState::Kill)
|
||||||
|
.addReg(AArch64::X15, RegState::Kill)
|
||||||
|
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
|
||||||
|
.setMIFlags(MachineInstr::FrameSetup);
|
||||||
|
NumBytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate space for the rest of the frame.
|
// Allocate space for the rest of the frame.
|
||||||
if (NumBytes) {
|
if (NumBytes) {
|
||||||
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
|
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
|
||||||
@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
unsigned UnspilledCSGPR = AArch64::NoRegister;
|
unsigned UnspilledCSGPR = AArch64::NoRegister;
|
||||||
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
|
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
|
||||||
|
|
||||||
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
|
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
|
||||||
|
|
||||||
|
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
|
||||||
|
? RegInfo->getBaseRegister()
|
||||||
|
: (unsigned)AArch64::NoRegister;
|
||||||
|
|
||||||
|
unsigned SpillEstimate = SavedRegs.count();
|
||||||
|
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||||
|
unsigned Reg = CSRegs[i];
|
||||||
|
unsigned PairedReg = CSRegs[i ^ 1];
|
||||||
|
if (Reg == BasePointerReg)
|
||||||
|
SpillEstimate++;
|
||||||
|
if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
|
||||||
|
SpillEstimate++;
|
||||||
|
}
|
||||||
|
SpillEstimate += 2; // Conservatively include FP+LR in the estimate
|
||||||
|
unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
|
||||||
|
|
||||||
// The frame record needs to be created by saving the appropriate registers
|
// The frame record needs to be created by saving the appropriate registers
|
||||||
if (hasFP(MF)) {
|
if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
|
||||||
SavedRegs.set(AArch64::FP);
|
SavedRegs.set(AArch64::FP);
|
||||||
SavedRegs.set(AArch64::LR);
|
SavedRegs.set(AArch64::LR);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned BasePointerReg = AArch64::NoRegister;
|
|
||||||
if (RegInfo->hasBasePointer(MF))
|
|
||||||
BasePointerReg = RegInfo->getBaseRegister();
|
|
||||||
|
|
||||||
unsigned ExtraCSSpill = 0;
|
unsigned ExtraCSSpill = 0;
|
||||||
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
|
|
||||||
// Figure out which callee-saved registers to save/restore.
|
// Figure out which callee-saved registers to save/restore.
|
||||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||||
const unsigned Reg = CSRegs[i];
|
const unsigned Reg = CSRegs[i];
|
||||||
@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
|
|
||||||
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
||||||
// won't include them.
|
// won't include them.
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
||||||
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
|
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
|
||||||
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
|
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
|
||||||
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
|
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
|
||||||
|
25
test/CodeGen/AArch64/chkstk.ll
Normal file
25
test/CodeGen/AArch64/chkstk.ll
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \
|
||||||
|
; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
|
||||||
|
|
||||||
|
; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \
|
||||||
|
; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
|
||||||
|
|
||||||
|
define void @check_watermark() {
|
||||||
|
entry:
|
||||||
|
%buffer = alloca [4096 x i8], align 1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-DEFAULT-CODE-MODEL: check_watermark:
|
||||||
|
; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp
|
||||||
|
; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100
|
||||||
|
; CHECK-DEFAULT-CODE-MODEL: bl __chkstk
|
||||||
|
; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4
|
||||||
|
|
||||||
|
; CHECK-LARGE-CODE-MODEL: check_watermark:
|
||||||
|
; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp
|
||||||
|
; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100
|
||||||
|
; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk
|
||||||
|
; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk
|
||||||
|
; CHECK-LARGE-CODE-MODEL: blr x16
|
||||||
|
; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4
|
Loading…
Reference in New Issue
Block a user