1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

Revert "Support -fstack-clash-protection for x86"

This reverts commit 0fd51a4554f5f4f90342f40afd35b077f6d88213.

Failures:

http://lab.llvm.org:8011/builders/llvm-clang-win-x-armv7l/builds/4354
This commit is contained in:
serge-sans-paille 2020-02-09 10:06:31 +01:00
parent 9120fd7c93
commit 85fea1133e
17 changed files with 21 additions and 662 deletions

View File

@ -84,10 +84,6 @@ Changes to the X86 Target
During this release ...
* Functions with the probe-stack attribute set to "inline-asm" are now protected
against stack clash without the need of a third-party probing function and
with limited impact on performance.
Changes to the AMDGPU Target
-----------------------------

View File

@ -1727,10 +1727,6 @@ public:
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
return "";
}

View File

@ -162,13 +162,14 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
bool UseStackProbe =
!STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
return false;
if (InsideFrameSequence)
return false;

View File

@ -17,7 +17,6 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -33,12 +32,6 @@
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
#define DEBUG_TYPE "x86-fl"
STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
STATISTIC(NumFrameExtraProbe,
"Number of extra stack probes generated in prologue");
using namespace llvm;
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
@ -264,27 +257,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
uint64_t Chunk = (1LL << 31) - 1;
MachineFunction &MF = *MBB.getParent();
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
// It's ok to not take into account large chunks when probing, as the
// allocation is split in smaller chunks anyway.
if (EmitInlineStackProbe && !InEpilogue) {
// stack probing may involve looping, and control flow generations is
// disallowed at this point. Rely to later processing through
// `inlineStackProbe`.
MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
// Encode the static offset as a metadata attached to the stub.
LLVMContext &Context = MF.getFunction().getContext();
MachineInstrBuilder(MF, Stub).addMetadata(
MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get(
IntegerType::get(Context, 64), Offset))}));
return;
} else if (Offset > Chunk) {
if (Offset > Chunk) {
// Rather than emit a long series of instructions for large offsets,
// load the offset into a register and do one sub/add
unsigned Reg = 0;
@ -408,8 +381,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
: getADDriOpcode(Uses64BitFramePtr, AbsOffset);
unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
: getADDriOpcode(Uses64BitFramePtr, AbsOffset);
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(AbsOffset);
@ -555,170 +528,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
const DebugLoc &DL,
bool InProlog) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
else
emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
}
void X86FrameLowering::emitStackProbeInlineGeneric(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
MachineInstr &CallToInline = *std::prev(MBBI);
assert(CallToInline.getOperand(1).isMetadata() &&
"no metadata attached to that probe");
uint64_t Offset =
cast<ConstantInt>(
cast<ConstantAsMetadata>(
cast<MDTuple>(CallToInline.getOperand(1).getMetadata())
->getOperand(0))
->getValue())
->getZExtValue();
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
"different expansion expected for CoreCLR 64 bit");
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
uint64_t ProbeChunk = StackProbeSize * 8;
// Synthesize a loop or unroll it, depending on the number of iterations.
if (Offset > ProbeChunk) {
emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
} else {
emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
}
}
void X86FrameLowering::emitStackProbeInlineGenericBlock(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
uint64_t Offset) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
uint64_t CurrentOffset = 0;
// 0 Thanks to return address being saved on the stack
uint64_t CurrentProbeOffset = 0;
// For the first N - 1 pages, just probe. I tried to take advantage of
// natural probes but it implies much more logic and there was very few
// interesting natural probes to interleave.
while (CurrentOffset + StackProbeSize < Offset) {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
.setMIFlag(MachineInstr::FrameSetup),
StackPtr, false, 0)
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
NumFrameExtraProbe++;
CurrentOffset += StackProbeSize;
CurrentProbeOffset += StackProbeSize;
}
uint64_t ChunkSize = Offset - CurrentOffset;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(ChunkSize)
.setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}
void X86FrameLowering::emitStackProbeInlineGenericLoop(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
uint64_t Offset) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
// Synthesize a loop
NumFrameLoopProbe++;
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = ++MBB.getIterator();
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
// save loop bound
{
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr)
.addReg(FinalStackPtr)
.addImm(Offset / StackProbeSize * StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
}
// allocate a page
{
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
}
// touch the page
addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
.setMIFlag(MachineInstr::FrameSetup),
StackPtr, false, 0)
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
// cmp with stack pointer bound
BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr))
.addReg(StackPtr)
.addReg(FinalStackPtr)
.setMIFlag(MachineInstr::FrameSetup);
// jump
BuildMI(testMBB, DL, TII.get(X86::JCC_1))
.addMBB(testMBB)
.addImm(X86::COND_NE)
.setMIFlag(MachineInstr::FrameSetup);
testMBB->addSuccessor(testMBB);
testMBB->addSuccessor(tailMBB);
testMBB->addLiveIn(FinalStackPtr);
// allocate a block and touch it
tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
MBB.addSuccessor(testMBB);
if (Offset % StackProbeSize) {
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(Offset % StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
}
}
void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
assert(STI.is64Bit() && "different expansion needed for 32 bit");
assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
const TargetInstrInfo &TII = *STI.getInstrInfo();
@ -1012,13 +821,13 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
}
}
MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
void X86FrameLowering::emitStackProbeInlineStub(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
assert(InProlog && "ChkStkStub called outside prolog!");
return BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("__chkstk_stub");
}
@ -1205,8 +1014,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
const bool EmitStackProbeCall =
STI.getTargetLowering()->hasStackProbeSymbol(MF);
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
@ -1224,10 +1032,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) &&
if (has128ByteRedZone(MF) &&
!TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
!EmitStackProbeCall && // No stack probes.
!UseStackProbe && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@ -1428,7 +1237,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
uint64_t AlignedNumBytes = NumBytes;
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
assert(!X86FI->getUsesRedZone() &&
"The Red Zone is not accounted for in stack probes");

View File

@ -195,33 +195,11 @@ private:
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;
void emitStackProbeInlineWindowsCoreCLR64(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
bool InProlog) const;
void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;
void emitStackProbeInlineGenericBlock(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
uint64_t Offset) const;
void emitStackProbeInlineGenericLoop(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
uint64_t Offset) const;
/// Emit a stub to later inline the target stack probe.
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
bool InProlog) const;
void emitStackProbeInlineStub(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,

View File

@ -23111,9 +23111,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool EmitStackProbeCall = hasStackProbeSymbol(MF);
bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
SplitStack || EmitStackProbeCall;
SplitStack || EmitStackProbe;
SDLoc dl(Op);
// Get the inputs.
@ -23137,21 +23137,11 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Align StackAlign(TFI.getStackAlignment());
if (hasInlineStackProbe(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
Register Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
} else {
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
}
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Alignment && Alignment > StackAlign)
Result =
DAG.getNode(ISD::AND, dl, VT, Result,
@ -29883,7 +29873,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
NODE_NAME_CASE(PROBED_ALLOCA)
NODE_NAME_CASE(RDRAND)
NODE_NAME_CASE(RDSEED)
NODE_NAME_CASE(RDPKRU)
@ -31151,97 +31140,6 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
return SinkMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
const unsigned ProbeSize = getStackProbeSize(*MF);
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = ++BB->getIterator();
MF->insert(MBBIter, testMBB);
MF->insert(MBBIter, blockMBB);
MF->insert(MBBIter, tailMBB);
unsigned sizeVReg = MI.getOperand(1).getReg();
const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
// test rsp size
BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
.addReg(sizeVReg)
.addMBB(BB)
.addReg(tmpSizeVReg2)
.addMBB(blockMBB);
BuildMI(testMBB, DL,
TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri))
.addReg(tmpSizeVReg)
.addImm(ProbeSize);
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
.addMBB(tailMBB)
.addImm(X86::COND_L);
testMBB->addSuccessor(blockMBB);
testMBB->addSuccessor(tailMBB);
// allocate a block and touch it
BuildMI(blockMBB, DL,
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
tmpSizeVReg2)
.addReg(tmpSizeVReg)
.addImm(ProbeSize);
BuildMI(blockMBB, DL,
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
physSPReg)
.addReg(physSPReg)
.addImm(ProbeSize);
const unsigned MovMIOpc =
TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
.addImm(0);
BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
blockMBB->addSuccessor(testMBB);
// allocate the tail and continue
BuildMI(tailMBB, DL,
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr),
physSPReg)
.addReg(physSPReg)
.addReg(tmpSizeVReg);
BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
.addReg(physSPReg);
tailMBB->splice(tailMBB->end(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
tailMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(testMBB);
// Delete the original pseudo instruction.
MI.eraseFromParent();
// And we're done.
return tailMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
@ -32402,9 +32300,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
case X86::PROBED_ALLOCA_32:
case X86::PROBED_ALLOCA_64:
return EmitLoweredProbedAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@ -47664,35 +47559,10 @@ bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
/// Returns true if stack probing through a function call is requested.
bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const {
return !getStackProbeSymbolName(MF).empty();
}
/// Returns true if stack probing through inline assembly is requested.
bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
// No inline stack probe for Windows, they have their own mechanism.
if (Subtarget.isOSWindows() ||
MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
return false;
// If the function specifically requests inline stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
"inline-asm";
return false;
}
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
StringRef
X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
// Inline Stack probes disable stack probe call
if (hasInlineStackProbe(MF))
return "";
// If the function specifically requests stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();

View File

@ -537,10 +537,6 @@ namespace llvm {
// falls back to heap allocation if not.
SEG_ALLOCA,
// For allocating stack space when using stack clash protector.
// Allocation is performed by block, and each block is probed.
PROBED_ALLOCA,
// Memory barriers.
MEMBARRIER,
MFENCE,
@ -1228,8 +1224,6 @@ namespace llvm {
bool supportSwiftError() const override;
bool hasStackProbeSymbol(MachineFunction &MF) const override;
bool hasInlineStackProbe(MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
@ -1454,9 +1448,6 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const;

View File

@ -111,23 +111,6 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
// To protect against stack clash, dynamic allocation should perform a memory
// probe at each page.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
"# variable sized alloca with probing",
[(set GR32:$dst,
(X86ProbedAlloca GR32:$size))]>,
Requires<[NotLP64]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
"# variable sized alloca with probing",
[(set GR64:$dst,
(X86ProbedAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
}
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows

View File

@ -121,8 +121,6 @@ def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@ -294,9 +292,6 @@ def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
[SDNPHasChain]>;
def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
[SDNPHasChain]>;
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;

View File

@ -1,44 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: leaq 15(,%rax,4), %rax
; CHECK-NEXT: andq $-16, %rax
; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
; CHECK-NEXT: jl .LBB0_3
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: subq $4096, %rax # imm = 0x1000
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
; CHECK-NEXT: jge .LBB0_2
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: subq %rax, %rsp
; CHECK-NEXT: movq %rsp, %rax
; CHECK-NEXT: movl $1, 4792(%rax)
; CHECK-NEXT: movl (%rax), %eax
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
%a = alloca i32, i32 %n, align 16
%b = getelementptr inbounds i32, i32* %a, i64 1198
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,38 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo() local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsp, %r11
; CHECK-NEXT: subq $69632, %r11 # imm = 0x11000
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: cmpq %r11, %rsp
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT:# %bb.2:
; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8
; CHECK-NEXT: .cfi_def_cfa_offset 71888
; CHECK-NEXT: movl $1, 264(%rsp)
; CHECK-NEXT: movl $1, 28664(%rsp)
; CHECK-NEXT: movl -128(%rsp), %eax
; CHECK-NEXT: addq $71880, %rsp # imm = 0x118C8
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%a = alloca i32, i64 18000, align 16
%b0 = getelementptr inbounds i32, i32* %a, i64 98
%b1 = getelementptr inbounds i32, i32* %a, i64 7198
store volatile i32 1, i32* %b0
store volatile i32 1, i32* %b1
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,32 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo() local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT .cfi_def_cfa_offset 5888
; CHECK-NEXT movl $1, 2088(%rsp)
; CHECK-NEXT subq $1784, %rsp # imm = 0x6F8
; CHECK-NEXT movl $2, 672(%rsp)
; CHECK-NEXT movl 1872(%rsp), %eax
; CHECK-NEXT addq $5880, %rsp # imm = 0x16F8
; CHECK-NEXT .cfi_def_cfa_offset 8
; CHECK-NEXT retq
%a = alloca i32, i64 1000, align 16
%b = alloca i32, i64 500, align 16
%a0 = getelementptr inbounds i32, i32* %a, i64 500
%b0 = getelementptr inbounds i32, i32* %b, i64 200
store volatile i32 1, i32* %a0
store volatile i32 2, i32* %b0
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,33 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo() local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
; CHECK-NEXT: .cfi_def_cfa_offset 7888
; CHECK-NEXT: movl $1, 264(%rsp)
; CHECK-NEXT: movl $1, 4664(%rsp)
; CHECK-NEXT: movl -128(%rsp), %eax
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%a = alloca i32, i64 2000, align 16
%b0 = getelementptr inbounds i32, i32* %a, i64 98
%b1 = getelementptr inbounds i32, i32* %a, i64 1198
store i32 1, i32* %b0
store i32 1, i32* %b1
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,30 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo() local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
; CHECK-NEXT: .cfi_def_cfa_offset 7888
; CHECK-NEXT: movl $1, 672(%rsp)
; CHECK-NEXT: movl -128(%rsp), %eax
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%a = alloca i32, i64 2000, align 16
%b = getelementptr inbounds i32, i32* %a, i64 200
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,27 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i64 %i) local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
; CHECK-NEXT: .cfi_def_cfa_offset 7888
; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
; CHECK-NEXT: movl -128(%rsp), %eax
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%a = alloca i32, i32 2000, align 16
%b = getelementptr inbounds i32, i32* %a, i64 %i
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,25 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo() local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $280, %rsp # imm = 0x118
; CHECK-NEXT: .cfi_def_cfa_offset 288
; CHECK-NEXT: movl $1, 264(%rsp)
; CHECK-NEXT: movl -128(%rsp), %eax
; CHECK-NEXT: addq $280, %rsp # imm = 0x118
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%a = alloca i32, i64 100, align 16
%b = getelementptr inbounds i32, i32* %a, i64 98
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -1,31 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg);
define void @foo() local_unnamed_addr #0 {
;CHECK-LABEL: foo:
;CHECK: # %bb.0:
;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
; it's important that we don't use the call as a probe here
;CHECK-NEXT: movq $0, (%rsp)
;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48
;CHECK-NEXT: .cfi_def_cfa_offset 8016
;CHECK-NEXT: movq %rsp, %rdi
;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40
;CHECK-NEXT: xorl %esi, %esi
;CHECK-NEXT: callq memset
;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48
;CHECK-NEXT: .cfi_def_cfa_offset 8
;CHECK-NEXT: retq
%a = alloca i8, i64 8000, align 16
call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)
ret void
}
attributes #0 = {"probe-stack"="inline-asm"}