diff --git a/docs/SegmentedStacks.rst b/docs/SegmentedStacks.rst index e44ce42313c..c0bf32b3f92 100644 --- a/docs/SegmentedStacks.rst +++ b/docs/SegmentedStacks.rst @@ -13,9 +13,8 @@ monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called *stacklets*) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, -call into the libgcc runtime to allocate more stack space. When using ``llc``, -segmented stacks can be enabled by adding ``-segmented-stacks`` to the command -line. +call into the libgcc runtime to allocate more stack space. Segmented stacks are +enabled with the ``"split-stack"`` attribute on LLVM functions. The runtime functionality is `already there in libgcc `_. diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 02a4bb5537f..0fe9e141b5f 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -189,11 +189,6 @@ EnablePIE("enable-pie", cl::desc("Assume the creation of a position independent executable."), cl::init(false)); -cl::opt -SegmentedStacks("segmented-stacks", - cl::desc("Use segmented stacks if possible."), - cl::init(false)); - cl::opt UseInitArray("use-init-array", cl::desc("Use .init_array instead of .ctors."), @@ -229,7 +224,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.StackAlignmentOverride = OverrideStackAlignment; Options.TrapFuncName = TrapFuncName; Options.PositionIndependentExecutable = EnablePIE; - Options.EnableSegmentedStacks = SegmentedStacks; Options.UseInitArray = UseInitArray; return Options; } diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 652d63dbbf8..a55d436848f 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -259,6 +259,9 @@ public: return MBBNumbering[N]; } + /// Should we be emitting segmented stack stuff for the function + bool shouldSplitStack(); + /// getNumBlockIDs - Return the number of MBB ID's allocated. /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 1f873439c65..122324fbb07 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -49,7 +49,7 @@ namespace llvm { JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), StackAlignmentOverride(0), EnableFastISel(false), PositionIndependentExecutable(false), - EnableSegmentedStacks(false), UseInitArray(false), + UseInitArray(false), DisableIntegratedAS(false), CompressDebugSections(false), TrapFuncName(""), FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard) {} @@ -152,8 +152,6 @@ namespace llvm { /// if the relocation model is anything other than PIC. unsigned PositionIndependentExecutable : 1; - unsigned EnableSegmentedStacks : 1; - /// UseInitArray - Use .init_array instead of .ctors for static /// constructors. unsigned UseInitArray : 1; @@ -217,7 +215,6 @@ inline bool operator==(const TargetOptions &LHS, ARE_EQUAL(StackAlignmentOverride) && ARE_EQUAL(EnableFastISel) && ARE_EQUAL(PositionIndependentExecutable) && - ARE_EQUAL(EnableSegmentedStacks) && ARE_EQUAL(UseInitArray) && ARE_EQUAL(TrapFuncName) && ARE_EQUAL(FloatABIType) && diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 4c5b767b05a..013eef71d73 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -123,6 +123,11 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { return JumpTableInfo; } +/// Should we be emitting segmented stack stuff for the function +bool MachineFunction::shouldSplitStack() { + return getFunction()->hasFnAttribute("split-stack"); +} + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1f51ce030c1..6b23b52ec5f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -680,7 +680,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.getTarget().Options.EnableSegmentedStacks) + if (Fn.shouldSplitStack()) TFI.adjustForSegmentedStacks(Fn); // Emit additional code that is required to explicitly handle the stack in diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 51d08998817..7fe143bd5d6 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -140,7 +140,6 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) { Options.StackAlignmentOverride = options.StackAlignmentOverride; Options.TrapFuncName = options.TrapFuncName; Options.PositionIndependentExecutable = options.PositionIndependentExecutable; - Options.EnableSegmentedStacks = options.EnableSegmentedStacks; Options.UseInitArray = options.UseInitArray; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f0ad4d12d00..0a2f8eab476 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -446,7 +446,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone !usesTheStack(MF) && // Don't push and pop. - !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bc2c4e9f66e..bca8cae5f67 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -635,15 +635,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else if (TM.Options.EnableSegmentedStacks) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -11102,13 +11095,50 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isOSWindows() || - getTargetMachine().Options.EnableSegmentedStacks) && - "This should be used only on Windows targets or when segmented stacks " - "are being used"); - assert(!Subtarget->isTargetMacho() && "Not implemented"); + MachineFunction &MF = DAG.getMachineFunction(); + bool SplitStack = MF.shouldSplitStack(); + bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) || + SplitStack; SDLoc dl(Op); + if (!Lower) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDNode* Node = Op.getNode(); + + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); + const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + unsigned StackAlign = TFI.getStackAlignment(); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); + + SDValue Ops[2] = { Tmp1, Tmp2 }; + return DAG.getMergeValues(Ops, 2, dl); + } + // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -11118,8 +11148,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, bool Is64Bit = Subtarget->is64Bit(); EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - if (getTargetMachine().Options.EnableSegmentedStacks) { - MachineFunction &MF = DAG.getMachineFunction(); + if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -15796,7 +15825,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - assert(getTargetMachine().Options.EnableSegmentedStacks); + assert(MF->shouldSplitStack()); unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll index b0dc467cfb5..e866b4e124d 100644 --- a/test/CodeGen/ARM/debug-segmented-stacks.ll +++ b/test/CodeGen/ARM/debug-segmented-stacks.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} !llvm.ident = !{!11} -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -78,3 +78,5 @@ define void @test_basic() { ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll index 13b5bcf2adb..77c62de0cfd 100644 --- a/test/CodeGen/ARM/segmented-stacks-dynamic.ll +++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux -; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define i32 @test_basic(i32 %l) { +define i32 @test_basic(i32 %l) #0 { %mem = alloca i32, i32 %l call void @dummy_use (i32* %mem, i32 %l) %terminate = icmp eq i32 %l, 0 @@ -60,3 +60,5 @@ false: ; ARM-android: pop {r4, r5} } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll index 5eff63303bc..a7804b900a5 100644 --- a/test/CodeGen/ARM/segmented-stacks.ll +++ b/test/CodeGen/ARM/segmented-stacks.ll @@ -1,15 +1,15 @@ -; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux ; We used to crash with filetype=obj -; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -54,7 +54,7 @@ define void @test_basic() { } -define i32 @test_nested(i32 * nest %closure, i32 %other) { +define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend ret i32 %result @@ -99,7 +99,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) { } -define void @test_large() { +define void @test_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -144,7 +144,7 @@ define void @test_large() { } -define fastcc void @test_fastcc() { +define fastcc void @test_fastcc() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -189,7 +189,7 @@ define fastcc void @test_fastcc() { } -define fastcc void @test_fastcc_large() { +define fastcc void @test_fastcc_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -233,3 +233,5 @@ define fastcc void @test_fastcc_large() { ; ARM-android: pop {r4, r5} } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/Thumb/segmented-stacks-dynamic.ll b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll index 067c07b689e..5d51f4052c1 100644 --- a/test/CodeGen/Thumb/segmented-stacks-dynamic.ll +++ b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux -; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android -; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux +; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define i32 @test_basic(i32 %l) { +define i32 @test_basic(i32 %l) #0 { %mem = alloca i32, i32 %l call void @dummy_use (i32* %mem, i32 %l) %terminate = icmp eq i32 %l, 0 @@ -61,3 +61,5 @@ false: ; Thumb-android: pop {r4, r5} } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll index 5649b0088df..89043ec11dc 100644 --- a/test/CodeGen/Thumb/segmented-stacks.ll +++ b/test/CodeGen/Thumb/segmented-stacks.ll @@ -1,13 +1,13 @@ -; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android -; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux -; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux +; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -54,7 +54,7 @@ define void @test_basic() { } -define i32 @test_nested(i32 * nest %closure, i32 %other) { +define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend ret i32 %result @@ -101,7 +101,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) { } -define void @test_large() { +define void @test_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -150,7 +150,7 @@ define void @test_large() { } -define fastcc void @test_fastcc() { +define fastcc void @test_fastcc() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -197,7 +197,7 @@ define fastcc void @test_fastcc() { } -define fastcc void @test_fastcc_large() { +define fastcc void @test_fastcc_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -245,3 +245,5 @@ define fastcc void @test_fastcc_large() { ; Thumb-linux: pop {r4, r5} } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/Thumb2/segmented-stacks.ll b/test/CodeGen/Thumb2/segmented-stacks.ll index 602fc84e254..38bf91564eb 100644 --- a/test/CodeGen/Thumb2/segmented-stacks.ll +++ b/test/CodeGen/Thumb2/segmented-stacks.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android -; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -30,3 +30,5 @@ define void @test_basic() { ; Thumb-android: pop {r4, r5} } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll index e17076215d5..b82be41b8cb 100644 --- a/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define i32 @test_basic(i32 %l) { +define i32 @test_basic(i32 %l) #0 { %mem = alloca i32, i32 %l call void @dummy_use (i32* %mem, i32 %l) %terminate = icmp eq i32 %l, 0 @@ -62,3 +62,5 @@ false: ; X64: movq %rax, %rdi } + +attributes #0 = { "split-stack" } diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll index c02152bb638..8089f205684 100644 --- a/test/CodeGen/X86/segmented-stacks.ll +++ b/test/CodeGen/X86/segmented-stacks.ll @@ -1,23 +1,23 @@ -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux -; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin -; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW ; We used to crash with filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj -; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris -; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD ; X64-Solaris: Segmented stacks not supported on this platform @@ -26,7 +26,7 @@ ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) -define void @test_basic() { +define void @test_basic() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -104,7 +104,7 @@ define void @test_basic() { } -define i32 @test_nested(i32 * nest %closure, i32 %other) { +define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend ret i32 %result @@ -177,7 +177,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) { } -define void @test_large() { +define void @test_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -249,7 +249,7 @@ define void @test_large() { } -define fastcc void @test_fastcc() { +define fastcc void @test_fastcc() #0 { %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void @@ -327,7 +327,7 @@ define fastcc void @test_fastcc() { } -define fastcc void @test_fastcc_large() { +define fastcc void @test_fastcc_large() #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 0) ret void @@ -412,7 +412,7 @@ define fastcc void @test_fastcc_large() { } -define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) { +define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 %a) ret void @@ -434,3 +434,5 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) { ; X32-Darwin-NEXT: ret } + +attributes #0 = { "split-stack" }