1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

IR/AArch64/X86: add "swifttailcc" calling convention.

Swift's new concurrency features are going to require guaranteed tail calls so
that they don't consume excessive amounts of stack space. This would normally
mean "tailcc", but there are also Swift-specific ABI desires that don't
naturally go along with "tailcc" so this adds another calling convention that's
the combination of "swiftcc" and "tailcc".

Support is added for AArch64 and X86 for now.
This commit is contained in:
Tim Northover 2020-11-19 12:32:50 +00:00
parent 2320a8cd94
commit fc5daa6083
34 changed files with 893 additions and 107 deletions

View File

@ -795,6 +795,8 @@ function. The operand fields are:
* ``swiftcc`` : code 16
* ``cxx_fast_tlscc``: code 17
* ``tailcc`` : code 18
* ``cfguard_checkcc`` : code 19
* ``swifttailcc`` : code 20
* ``x86_stdcallcc``: code 64
* ``x86_fastcallcc``: code 65
* ``arm_apcscc``: code 66

View File

@ -2064,11 +2064,12 @@ Tail call optimization
----------------------
Tail call optimization, callee reusing the stack of the caller, is currently
supported on x86/x86-64, PowerPC, and WebAssembly. It is performed on x86/x86-64
and PowerPC if:
supported on x86/x86-64, PowerPC, AArch64, and WebAssembly. It is performed on
x86/x86-64, PowerPC, and AArch64 if:
* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``.
calling convention), ``cc 11`` (HiPE calling convention), ``tailcc``, or
``swifttailcc``.
* The call is a tail call - in tail position (ret immediately follows call and
ret uses value of call or is void).
@ -2102,6 +2103,10 @@ WebAssembly constraints:
* The caller and callee's return types must match. The caller cannot
be void unless the callee is, too.
AArch64 constraints:
* No variable argument lists are used.
Example:
Call as ``llc -tailcallopt test.ll``.

View File

@ -432,10 +432,6 @@ added in the future:
- On X86-64 the callee preserves all general purpose registers, except for
RDI and RAX.
"``swiftcc``" - This calling convention is used for Swift language.
- On X86-64 RCX and R8 are available for additional integer returns, and
XMM2 and XMM3 are available for additional FP/vector returns.
- On iOS platforms, we use AAPCS-VFP calling convention.
"``tailcc``" - Tail callable calling convention
This calling convention ensures that calls in tail position will always be
tail call optimized. This calling convention is equivalent to fastcc,
@ -444,6 +440,14 @@ added in the future:
the GHC or the HiPE convention is used. <CodeGenerator.html#id80>`_ This
calling convention does not support varargs and requires the prototype of
all callees to exactly match the prototype of the function definition.
"``swiftcc``" - This calling convention is used for Swift language.
- On X86-64 RCX and R8 are available for additional integer returns, and
XMM2 and XMM3 are available for additional FP/vector returns.
- On iOS platforms, we use AAPCS-VFP calling convention.
"``swifttailcc``"
This calling convention is like ``swiftcc`` in most respects, but also the
callee pops the argument area of the stack so that mandatory tail calls are
possible as in ``tailcc``.
"``cfguard_checkcc``" - Windows Control Flow Guard (Check mechanism)
This calling convention is used for the Control Flow Guard check function,
calls to which can be inserted before indirect calls to check that the call
@ -12386,7 +12390,7 @@ context of a Swift execution.
Semantics:
""""""""""
If the function has a ``swiftasync`` parameter, that argument will initially
If the caller has a ``swiftasync`` parameter, that argument will initially
be stored at the returned address. If not, it will be initialized to null.
'``llvm.localescape``' and '``llvm.localrecover``' Intrinsics

View File

@ -156,6 +156,7 @@ enum Kind {
kw_webkit_jscc,
kw_anyregcc,
kw_swiftcc,
kw_swifttailcc,
kw_preserve_mostcc,
kw_preserve_allcc,
kw_ghccc,

View File

@ -86,6 +86,11 @@ namespace CallingConv {
/// and has no return value. All register values are preserved.
CFGuard_Check = 19,
/// SwiftTail - This follows the Swift calling convention in how arguments
/// are passed but guarantees tail calls will be made by making the callee
/// clean up their stack.
SwiftTail = 20,
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,

View File

@ -609,6 +609,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x86_regcallcc);
KEYWORD(webkit_jscc);
KEYWORD(swiftcc);
KEYWORD(swifttailcc);
KEYWORD(anyregcc);
KEYWORD(preserve_mostcc);
KEYWORD(preserve_allcc);

View File

@ -2124,6 +2124,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'preserve_allcc'
/// ::= 'ghccc'
/// ::= 'swiftcc'
/// ::= 'swifttailcc'
/// ::= 'x86_intrcc'
/// ::= 'hhvmcc'
/// ::= 'hhvm_ccc'
@ -2174,6 +2175,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break;
case lltok::kw_ghccc: CC = CallingConv::GHC; break;
case lltok::kw_swiftcc: CC = CallingConv::Swift; break;
case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break;
case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break;
case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;

View File

@ -511,9 +511,10 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
// not profitable. Also, if the callee is a special function (e.g.
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
((!TM.Options.GuaranteedTailCallOpt &&
Call.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
Call.getCallingConv() != CallingConv::Tail &&
Call.getCallingConv() != CallingConv::SwiftTail) ||
!isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a

View File

@ -388,6 +388,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::SPIR_FUNC: Out << "spir_func"; break;
case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break;
case CallingConv::Swift: Out << "swiftcc"; break;
case CallingConv::SwiftTail: Out << "swifttailcc"; break;
case CallingConv::X86_INTR: Out << "x86_intrcc"; break;
case CallingConv::HHVM: Out << "hhvmcc"; break;
case CallingConv::HHVM_C: Out << "hhvm_ccc"; break;

View File

@ -421,6 +421,9 @@ def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, LR, FP)>;
def CSR_AArch64_AAPCS_SwiftTail
: CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>;
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@ -473,6 +476,9 @@ def CSR_Darwin_AArch64_AAPCS_ThisReturn
def CSR_Darwin_AArch64_AAPCS_SwiftError
: CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
def CSR_Darwin_AArch64_AAPCS_SwiftTail
: CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
// fast path for calculation, but other registers except X0 (argument/return)

View File

@ -193,9 +193,14 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// Returns the argument pop size.
static uint64_t getArgumentPopSize(MachineFunction &MF,
MachineBasicBlock &MBB) {
/// Returns how much of the incoming argument stack area (in bytes) we should
/// clean up in an epilogue. For the C calling convention this will be 0, for
/// guaranteed tail call conventions it can be positive (a normal return or a
/// tail call to a function that uses less stack space for arguments) or
/// negative (for a tail call to a function that needs more stack space than us
/// for arguments).
static int64_t getArgumentStackToRestore(MachineFunction &MF,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
bool IsTailCallReturn = false;
if (MBB.end() != MBBI) {
@ -206,7 +211,7 @@ static uint64_t getArgumentPopSize(MachineFunction &MF,
}
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
uint64_t ArgumentPopSize = 0;
int64_t ArgumentPopSize = 0;
if (IsTailCallReturn) {
MachineOperand &StackAdjust = MBBI->getOperand(1);
@ -255,7 +260,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
return false;
if (Exit && getArgumentPopSize(MF, *Exit))
if (Exit && getArgumentStackToRestore(MF, *Exit))
return false;
return true;
@ -311,10 +316,10 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
const AArch64FunctionInfo *AFI, bool IsWin64,
bool IsFunclet) {
if (!IsWin64 || IsFunclet) {
// Only Win64 uses fixed objects, and then only for the function (not
// funclets)
return 0;
return AFI->getTailCallReservedStack();
} else {
if (AFI->getTailCallReservedStack() != 0)
report_fatal_error("cannot generate ABI-changing tail call for Win64");
// Var args are stored here in the primary function.
const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
// To support EH funclets we allocate an UnwindHelp object
@ -887,21 +892,17 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
++MBBI;
}
unsigned NewOpc;
int Scale = 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
Scale = 8;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
Scale = 8;
break;
case AArch64::STPQi:
NewOpc = AArch64::STPQpre;
Scale = 16;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
@ -914,15 +915,12 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
Scale = 8;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
Scale = 8;
break;
case AArch64::LDPQi:
NewOpc = AArch64::LDPQpost;
Scale = 16;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
@ -941,9 +939,18 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
TypeSize Scale = TypeSize::Fixed(1);
unsigned Width;
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
(void)Success;
assert(Success && "unknown load/store opcode");
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0) {
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(CSStackSizeInc), TII,
InProlog ? MachineInstr::FrameSetup
@ -966,7 +973,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
assert(CSStackSizeInc % Scale == 0);
MIB.addImm(CSStackSizeInc / Scale);
MIB.addImm(CSStackSizeInc / (int)Scale);
MIB.setMIFlags(MBBI->getFlags());
MIB.setMemRefs(MBBI->memoperands());
@ -1667,9 +1674,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
// Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB);
// How much of the stack used by incoming arguments this function is expected
// to restore in this particular epilogue.
int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
// The stack frame should be like below,
//
@ -1704,7 +1711,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
uint64_t AfterCSRPopSize = ArgumentPopSize;
int64_t AfterCSRPopSize = ArgumentStackToRestore;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// We cannot rely on the local stack size set in emitPrologue if the function
// has funclets, as funclets have different local stack size requirements, and
@ -1741,8 +1748,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
// If the offset is 0, convert it to a post-index ldp.
if (OffsetOp.getImm() == 0)
// If the offset is 0 and the AfterCSR pop is not actually trying to
// allocate more stack for arguments (in space that an untimely interrupt
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0)
convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
else {
@ -1913,6 +1922,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
if (AfterCSRPopSize) {
assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
"interrupt may have clobbered");
// Find an insertion point for the first ldp so that it goes before the
// shadow call stack epilog instruction. This ensures that the restore of
// lr from x18 is placed after the restore from sp.
@ -1928,7 +1939,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
StackOffset::getFixed(AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
if (HasWinCFI)
@ -2146,7 +2157,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
AttributeList Attrs = MF.getFunction().getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError));
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@ -2260,6 +2272,7 @@ static void computeCalleeSaveRegisterPairs(
FirstReg = Count - 1;
}
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@ -2369,17 +2382,16 @@ static void computeCalleeSaveRegisterPairs(
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
if (NeedGapToAlignStack && !NeedsWinCFI &&
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
!RPI.isPaired()) {
!RPI.isPaired() && ByteOffset % 16 != 0) {
ByteOffset += 8 * StackFillDir;
assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
// A stack frame with a gap looks like this, bottom up:
// d9, d8. x21, gap, x20, x19.
// Set extra alignment on the x21 object (the only unpaired register)
// to create the gap above it.
// Set extra alignment on the x21 object to create the gap above it.
MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
NeedGapToAlignStack = false;
}
int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;

View File

@ -4764,6 +4764,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
if (Subtarget->isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
@ -5240,8 +5242,9 @@ SDValue AArch64TargetLowering::LowerCallResult(
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC) {
return CC == CallingConv::Fast;
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// Return true if we might ever do TCO for calls with this calling convention.
@ -5251,9 +5254,12 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
case CallingConv::Fast:
return true;
default:
return canGuaranteeTCO(CC);
return false;
}
}
@ -5305,8 +5311,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return false;
}
if (getTargetMachine().Options.GuaranteedTailCallOpt)
return canGuaranteeTCO(CalleeCC) && CCMatch;
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
// tail-called on AArch64 when the OS does not support dynamic
@ -5437,7 +5443,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return CallCC == CallingConv::Fast && TailCallOpt;
return (CallCC == CallingConv::Fast && TailCallOpt) ||
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
@ -5489,7 +5496,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall)
if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
CallConv != CallingConv::SwiftTail)
IsSibCall = true;
if (IsTailCall)
@ -5580,6 +5588,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// can actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// Update the required reserved area if this is the tail call requiring the
// most argument stack space.
if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
FuncInfo->setTailCallReservedStack(-FPDiff);
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
@ -5591,7 +5604,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
getPointerTy(DAG.getDataLayout()));
@ -5848,7 +5861,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}

View File

@ -2695,6 +2695,38 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::STPXpre:
case AArch64::LDPXpost:
case AArch64::STPDpre:
case AArch64::LDPDpost:
Scale = TypeSize::Fixed(8);
Width = 8;
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = -1024;
MaxOffset = 1008;
break;
case AArch64::STRXpre:
case AArch64::STRDpre:
case AArch64::LDRXpost:
case AArch64::LDRDpost:
Scale = TypeSize::Fixed(1);
Width = 8;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
Scale = TypeSize::Fixed(1);
Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
Scale = TypeSize::Fixed(16);
Width = 0;

View File

@ -54,6 +54,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// callee is expected to pop the args.
unsigned ArgumentStackToRestore = 0;
/// Space just below incoming stack pointer reserved for arguments being
/// passed on the stack during a tail call. This will be the difference
/// between the largest tail call argument space needed in this function and
/// what's already available by reusing space of incoming arguments.
unsigned TailCallReservedStack = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
bool HasStackFrame = false;
@ -180,6 +186,11 @@ public:
ArgumentStackToRestore = bytes;
}
unsigned getTailCallReservedStack() const { return TailCallReservedStack; }
void setTailCallReservedStack(unsigned bytes) {
TailCallReservedStack = bytes;
}
bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
void setStackSizeSVE(uint64_t S) {

View File

@ -100,6 +100,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
MF->getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftError))
return CSR_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
return CSR_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
@ -134,6 +136,8 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
MF->getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftError))
return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
return CSR_Darwin_AArch64_AAPCS_SaveList;
@ -199,6 +203,8 @@ AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask;
if (CC == CallingConv::SwiftTail)
return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask;
if (CC == CallingConv::PreserveMost)
return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
return CSR_Darwin_AArch64_AAPCS_RegMask;
@ -233,6 +239,11 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask
: CSR_AArch64_AAPCS_SwiftError_RegMask;
if (CC == CallingConv::SwiftTail) {
if (SCS)
report_fatal_error("ShadowCallStack attribute not supported with swifttail");
return CSR_AArch64_AAPCS_SwiftTail_RegMask;
}
if (CC == CallingConv::PreserveMost)
return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
: CSR_AArch64_RT_MostRegs_RegMask;

View File

@ -332,7 +332,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
} // namespace
static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
return CallConv == CallingConv::Fast && TailCallOpt;
return (CallConv == CallingConv::Fast && TailCallOpt) ||
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@ -608,8 +609,9 @@ bool AArch64CallLowering::lowerFormalArguments(
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC) {
return CC == CallingConv::Fast;
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// Return true if we might ever do TCO for calls with this calling convention.
@ -618,9 +620,12 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
case CallingConv::C:
case CallingConv::PreserveMost:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
case CallingConv::Fast:
return true;
default:
return canGuaranteeTCO(CC);
return false;
}
}
@ -812,8 +817,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
}
// If we have -tailcallopt, then we're done.
if (MF.getTarget().Options.GuaranteedTailCallOpt)
return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
return CalleeCC == CallerF.getCallingConv();
// We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
// Try to find cases where we can do that.
@ -884,7 +889,9 @@ bool AArch64CallLowering::lowerTailCall(
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// True when we're tail calling, but without -tailcallopt.
bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
Info.CallConv != CallingConv::Tail &&
Info.CallConv != CallingConv::SwiftTail;
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
// register class. Until we can do that, we should fall back here.
@ -956,6 +963,11 @@ bool AArch64CallLowering::lowerTailCall(
// actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// Update the required reserved area if this is the tail call requiring the
// most argument stack space.
if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
FuncInfo->setTailCallReservedStack(-FPDiff);
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
@ -1003,12 +1015,12 @@ bool AArch64CallLowering::lowerTailCall(
// sequence start and end here.
if (!IsSibCall) {
MIB->getOperand(1).setImm(FPDiff);
CallSeqStart.addImm(NumBytes).addImm(0);
CallSeqStart.addImm(0).addImm(0);
// End the call sequence *before* emitting the call. Normally, we would
// tidy the frame up after the call. However, here, we've laid out the
// parameters so that when SP is reset, they will be in the correct
// location.
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
}
// Now we can add the actual call instruction to the correct basic block.

View File

@ -1996,6 +1996,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
return CallingConv::PreserveMost;
case CallingConv::ARM_AAPCS_VFP:
case CallingConv::Swift:
case CallingConv::SwiftTail:
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())

View File

@ -462,6 +462,7 @@ def RetCC_X86_64 : CallingConv<[
// Handle Swift calls.
CCIfCC<"CallingConv::Swift", CCDelegateTo<RetCC_X86_64_Swift>>,
CCIfCC<"CallingConv::SwiftTail", CCDelegateTo<RetCC_X86_64_Swift>>,
// Handle explicit CC selection
CCIfCC<"CallingConv::Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
@ -517,9 +518,15 @@ def CC_X86_64_C : CallingConv<[
// A SwiftError is passed in R12.
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
// For Swift Calling Convention, pass sret in %rax.
// Pass SwiftAsync in an otherwise callee saved register so that calls to
// normal functions don't need to save it somewhere.
CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[R14]>>>,
// For Swift Calling Conventions, pass sret in %rax.
CCIfCC<"CallingConv::Swift",
CCIfSRet<CCIfType<[i64], CCAssignToReg<[RAX]>>>>,
CCIfCC<"CallingConv::SwiftTail",
CCIfSRet<CCIfType<[i64], CCAssignToReg<[RAX]>>>>,
// Pointers are always passed in full 64-bit registers.
CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
@ -851,6 +858,10 @@ def CC_X86_32_C : CallingConv<[
// The 'nest' parameter, if any, is passed in ECX.
CCIfNest<CCAssignToReg<[ECX]>>,
// On swifttailcc pass swiftself in ECX.
CCIfCC<"CallingConv::SwiftTail",
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[ECX]>>>>,
// The first 3 integer arguments, if marked 'inreg' and if the call is not
// a vararg call, are passed in integer registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
@ -1080,6 +1091,7 @@ def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
def CSR_64_SwiftError : CalleeSavedRegs<(sub CSR_64, R12)>;
def CSR_64_SwiftTail : CalleeSavedRegs<(sub CSR_64, R13, R14)>;
def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>;
def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
@ -1090,6 +1102,7 @@ def CSR_Win64 : CalleeSavedRegs<(add CSR_Win64_NoSSE,
(sequence "XMM%u", 6, 15))>;
def CSR_Win64_SwiftError : CalleeSavedRegs<(sub CSR_Win64, R12)>;
def CSR_Win64_SwiftTail : CalleeSavedRegs<(sub CSR_Win64, R13, R14)>;
// The function used by Darwin to obtain the address of a thread-local variable
// uses rdi to pass a single parameter and rax for the return value. All other

View File

@ -1183,6 +1183,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
CC != CallingConv::Tail &&
CC != CallingConv::SwiftTail &&
CC != CallingConv::X86_FastCall &&
CC != CallingConv::X86_StdCall &&
CC != CallingConv::X86_ThisCall &&
@ -1197,7 +1198,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
CC == CallingConv::Tail)
CC == CallingConv::Tail || CC == CallingConv::SwiftTail)
return false;
// Let SDISel handle vararg functions.
@ -1285,7 +1286,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
CC != CallingConv::SwiftTail) {
Register Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@ -3142,7 +3144,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
if (Subtarget->getTargetTriple().isOSMSVCRT())
return 0;
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE || CC == CallingConv::Tail)
CC == CallingConv::HiPE || CC == CallingConv::Tail ||
CC == CallingConv::SwiftTail)
return 0;
if (CB)
@ -3194,6 +3197,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CallingConv::Tail:
case CallingConv::WebKit_JS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:
@ -3210,7 +3214,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
CC == CallingConv::Tail)
CC == CallingConv::Tail || CC == CallingConv::SwiftTail)
return false;
// Don't know how to handle Win64 varargs yet. Nothing special needed for

View File

@ -3184,7 +3184,8 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
CC == CallingConv::HHVM || CC == CallingConv::Tail);
CC == CallingConv::HHVM || CC == CallingConv::Tail ||
CC == CallingConv::SwiftTail);
}
/// Return true if we might ever do TCO for calls with this calling convention.
@ -3210,7 +3211,8 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
/// Return true if the function is being made into a tailcall target by
/// changing its ABI.
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
@ -3747,7 +3749,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
// Swift calling convention does not require we copy the sret argument
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
if (CallConv == CallingConv::Swift)
if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
continue;
// All x86 ABIs require that for returning structs by value we copy the
@ -3912,7 +3914,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail;
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
bool HasNCSR = (CB && isa<CallInst>(CB) &&
CB->hasFnAttr("no_caller_saved_registers"));
@ -4622,7 +4624,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
CalleeCC == CallingConv::Tail;
CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
@ -26836,6 +26838,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
case CallingConv::Tail:
case CallingConv::SwiftTail:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::EAX;

View File

@ -354,6 +354,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (!HasSSE)
return CSR_Win64_NoSSE_SaveList;
return CSR_Win64_SaveList;
case CallingConv::SwiftTail:
if (!Is64Bit)
return CSR_32_SaveList;
return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
case CallingConv::X86_64_SysV:
if (CallsEHReturn)
return CSR_64EHRet_SaveList;
@ -470,6 +474,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
break;
case CallingConv::Win64:
return CSR_Win64_RegMask;
case CallingConv::SwiftTail:
if (!Is64Bit)
return CSR_32_RegMask;
return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
case CallingConv::X86_64_SysV:
return CSR_64_RegMask;
case CallingConv::X86_INTR:
@ -502,6 +510,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
if (IsSwiftCC)
return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
}

View File

@ -889,6 +889,7 @@ public:
case CallingConv::Fast:
case CallingConv::Tail:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:

View File

@ -382,6 +382,8 @@ declare preserve_mostcc void @f.preserve_mostcc()
; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
declare preserve_allcc void @f.preserve_allcc()
; CHECK: declare preserve_allcc void @f.preserve_allcc()
declare swifttailcc void @f.swifttailcc()
; CHECK: declare swifttailcc void @f.swifttailcc()
declare cc64 void @f.cc64()
; CHECK: declare x86_stdcallcc void @f.cc64()
declare x86_stdcallcc void @f.x86_stdcallcc()

View File

@ -5,7 +5,7 @@
; Important details in prologue:
; * x22 is stored just below x29
; * Enough stack space is allocated for everything
define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" {
define swifttailcc void @simple(i8* swiftasync %ctx) "frame-pointer"="all" {
; CHECK-LABEL: simple:
; CHECK: orr x29, x29, #0x100000000000000
; CHECK: sub sp, sp, #32
@ -32,21 +32,20 @@ define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" {
ret void
}
define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" {
define swifttailcc void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" {
; CHECK-LABEL: more_csrs:
; CHECK: orr x29, x29, #0x100000000000000
; CHECK: sub sp, sp, #48
; CHECK: stp x24, x23, [sp, #8]
; CHECK: stp x29, x30, [sp, #32]
; CHECK: str x23, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK-NOAUTH: str x22, [sp, #24]
; CHECK-AUTH: add x16, sp, #24
; CHECK-NOAUTH: str x22, [sp, #8]
; CHECK-AUTH: add x16, sp, #8
; CHECK-AUTH: movk x16, #49946, lsl #48
; CHECK-AUTH: mov x17, x22
; CHECK-AUTH: pacdb x17, x16
; CHECK-AUTH: str x17, [sp, #24]
; CHECK-AUTH: str x17, [sp, #8]
; CHECK: add x29, sp, #32
; CHECK: add x29, sp, #16
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
@ -54,15 +53,14 @@ define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" {
; [...]
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x24, x23, [sp, #8]
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldr x23, [sp], #32
; CHECK: and x29, x29, #0xefffffffffffffff
; CHECK: add sp, sp, #48
call void asm sideeffect "", "~{x23}"()
ret void
}
define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" {
define swifttailcc void @locals(i8* swiftasync %ctx) "frame-pointer"="all" {
; CHECK-LABEL: locals:
; CHECK: orr x29, x29, #0x100000000000000
; CHECK: sub sp, sp, #64
@ -93,7 +91,7 @@ define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" {
ret void
}
define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" {
define swifttailcc void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" {
; CHECK-LABEL: use_input_context:
; CHECK-NOAUTH: str x22, [sp
@ -106,7 +104,7 @@ define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="
ret void
}
define i8** @context_in_func() "frame-pointer"="non-leaf" {
define swifttailcc i8** @context_in_func() "frame-pointer"="non-leaf" {
; CHECK-LABEL: context_in_func:
; CHECK-NOAUTH: str xzr, [sp, #8]
@ -120,7 +118,7 @@ define i8** @context_in_func() "frame-pointer"="non-leaf" {
ret i8** %ptr
}
define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" {
define swifttailcc void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" {
; CHECK-LABEL: write_frame_context:
; CHECK: sub x[[ADDR:[0-9]+]], x29, #8
; CHECK: str x0, [x[[ADDR]]]
@ -129,29 +127,48 @@ define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointe
ret void
}
define void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" {
define swifttailcc void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" {
; CHECK-LABEL: simple_fp_elim:
; CHECK-NOT: orr x29, x29, #0x100000000000000
ret void
}
define void @large_frame(i8* swiftasync %ctx) "frame-pointer"="all" {
define swifttailcc void @large_frame(i8* swiftasync %ctx) "frame-pointer"="all" {
; CHECK-LABEL: large_frame:
; CHECK: sub sp, sp, #48
; CHECK: stp x28, x27, [sp, #8]
; CHECK: stp x29, x30, [sp, #32]
; CHECK-NOAUTH: str x22, [sp, #24]
; CHECK: add x29, sp, #32
; CHECK: str x28, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK-NOAUTH: str x22, [sp, #8]
; CHECK: add x29, sp, #16
; CHECK: sub sp, sp, #1024
; [...]
; CHECK: add sp, sp, #1024
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x28, x27, [sp, #8]
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldr x28, [sp], #32
; CHECK: ret
%var = alloca i8, i32 1024
ret void
}
declare void @bar(i32*)
; Important point is that there is just one 8-byte gap in the CSR region (right
; now just above d8) to realign the stack.
define swifttailcc void @two_unpaired_csrs(i8* swiftasync) "frame-pointer"="all" {
; CHECK-LABEL: two_unpaired_csrs:
; CHECK: str d8, [sp, #-48]!
; CHECK: str x19, [sp, #16]
; CHECK: stp x29, x30, [sp, #32]
; CHECK-NOAUTH: str x22, [sp, #24]
; CHECK: add x29, sp, #32
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -32
; CHECK: .cfi_offset b8, -48
call void asm "","~{x19},~{d8}"()
call swifttailcc void @bar(i32* undef)
ret void
}
declare swifttailcc void @bar(i32*)
declare i8** @llvm.swift.async.context.addr()

View File

@ -0,0 +1,18 @@
; RUN: llc -mtriple=arm64-apple-ios %s -o - | FileCheck %s
declare swifttailcc void @swifttail_callee()
define swifttailcc void @swifttail() {
; CHECK-LABEL: swifttail:
; CHECK-NOT: ld{{.*}}x22
call void asm "","~{x22}"()
tail call swifttailcc void @swifttail_callee()
ret void
}
define swifttailcc void @no_preserve_swiftself() {
; CHECK-LABEL: no_preserve_swiftself:
; CHECK-NOT: ld{{.*}}x20
call void asm "","~{x20}"()
ret void
}

View File

@ -0,0 +1,230 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON
declare swifttailcc void @callee_stack0()
declare swifttailcc void @callee_stack8([8 x i64], i64)
declare swifttailcc void @callee_stack16([8 x i64], i64, i64)
declare extern_weak swifttailcc void @callee_weak()
define swifttailcc void @caller_to0_from0() nounwind {
; COMMON-LABEL: caller_to0_from0:
; COMMON-NEXT: // %bb.
tail call swifttailcc void @callee_stack0()
ret void
; COMMON-NEXT: b callee_stack0
}
define swifttailcc void @caller_to0_from8([8 x i64], i64) {
; COMMON-LABEL: caller_to0_from8:
tail call swifttailcc void @callee_stack0()
ret void
; COMMON: add sp, sp, #16
; COMMON-NEXT: b callee_stack0
}
define swifttailcc void @caller_to8_from0() {
; COMMON-LABEL: caller_to8_from0:
; Key point is that the "42" should go #16 below incoming stack
; pointer (we didn't have arg space to reuse).
tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #-16]!
; COMMON-NEXT: b callee_stack8
}
define swifttailcc void @caller_to8_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to8_from8:
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at SP on entry.
tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack8
}
define swifttailcc void @caller_to16_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to16_from8:
; COMMON-NOT: sub sp,
; Important point is that the call reuses the "dead" argument space
; above %a on the stack. If it tries to go below incoming-SP then the
; callee will not deallocate the space, even in swifttailcc.
tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2)
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
ret void
}
define swifttailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) {
; COMMON-LABEL: caller_to8_from24:
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at #16 above SP on entry.
tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #16]!
; COMMON-NEXT: b callee_stack8
}
define swifttailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) {
; COMMON-LABEL: caller_to16_from16:
; COMMON-NOT: sub sp,
; Here we want to make sure that both loads happen before the stores:
; otherwise either %a or %b will be wrongly clobbered.
tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
ret void
; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
}
define swifttailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" {
; COMMON-LABEL: disable_tail_calls:
; COMMON-NEXT: // %bb.
tail call swifttailcc void @callee_stack0()
ret void
; COMMON: bl callee_stack0
; COMMON: ret
}
; Weakly-referenced extern functions cannot be tail-called, as AAELF does
; not define the behaviour of branch instructions to undefined weak symbols.
define swifttailcc void @caller_weak() {
; COMMON-LABEL: caller_weak:
; COMMON: bl callee_weak
tail call void @callee_weak()
ret void
}
declare { [2 x float] } @get_vec2()
define { [3 x float] } @test_add_elem() {
; SDAG-LABEL: test_add_elem:
; SDAG: bl get_vec2
; SDAG: fmov s2, #1.0
; SDAG: ret
; GISEL-LABEL: test_add_elem:
; GISEL: str x30, [sp, #-16]!
; GISEL: bl get_vec2
; GISEL: fmov s2, #1.0
; GISEL: ldr x30, [sp], #16
; GISEL: ret
%call = tail call { [2 x float] } @get_vec2()
%arr = extractvalue { [2 x float] } %call, 0
%arr.0 = extractvalue [2 x float] %arr, 0
%arr.1 = extractvalue [2 x float] %arr, 1
%res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0
%res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1
%res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2
ret { [3 x float] } %res.012
}
declare double @get_double()
define { double, [2 x double] } @test_mismatched_insert() {
; COMMON-LABEL: test_mismatched_insert:
; COMMON: bl get_double
; COMMON: bl get_double
; COMMON: bl get_double
; COMMON: ret
%val0 = call double @get_double()
%val1 = call double @get_double()
%val2 = tail call double @get_double()
%res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0
%res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0
%res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1
ret { double, [2 x double] } %res.012
}
define void @fromC_totail() {
; COMMON-LABEL: fromC_totail:
; COMMON: sub sp, sp, #48
; COMMON-NOT: sub sp,
; COMMON: mov w[[TMP:[0-9]+]], #42
; COMMON: str x[[TMP]], [sp]
; COMMON: bl callee_stack8
; We must reset the stack to where it was before the call by undoing its extra stack pop.
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; COMMON: sub sp, sp, #16
call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
}
define void @fromC_totail_noreservedframe(i32 %len) {
; COMMON-LABEL: fromC_totail_noreservedframe:
; COMMON: stp x29, x30, [sp, #-48]!
; COMMON: mov w[[TMP:[0-9]+]], #42
; Note stack is subtracted here to allocate space for arg
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; And here.
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; But not restored here because callee_stack8 did that for us.
; COMMON-NOT: sub sp,
; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs.
%var = alloca i32, i32 %len
call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
}
declare void @Ccallee_stack8([8 x i64], i64)
define swifttailcc void @fromtail_toC() {
; COMMON-LABEL: fromtail_toC:
; COMMON: sub sp, sp, #32
; COMMON-NOT: sub sp,
; COMMON: mov w[[TMP:[0-9]+]], #42
; COMMON: str x[[TMP]], [sp]
; COMMON: bl Ccallee_stack8
; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything.
; COMMON-NOT: add sp,
; COMMON-NOT: sub sp,
; COMMON: str x[[TMP]], [sp]{{$}}
; COMMON: bl Ccallee_stack8
; COMMON-NOT: sub sp,
call void @Ccallee_stack8([8 x i64] undef, i64 42)
call void @Ccallee_stack8([8 x i64] undef, i64 42)
ret void
}
declare swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure)
define swiftcc i8* @CallSwiftSelf(i8* swiftself %closure, i8* %context) {
; CHECK-LABEL: CallSwiftSelf:
; CHECK: stp x20
;call void asm "","~{r13}"() ; We get a push r13 but why not with the call
; below?
%res = call swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure)
ret i8* %res
}

View File

@ -28,39 +28,38 @@ define fastcc void @caller_to0_from8([8 x i64], i64) {
define fastcc void @caller_to8_from0() {
; COMMON-LABEL: caller_to8_from0:
; COMMON: sub sp, sp, #32
; Key point is that the "42" should go #16 below incoming stack
; pointer (we didn't have arg space to reuse).
tail call fastcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #16]!
; COMMON: str {{x[0-9]+}}, [sp, #-16]!
; COMMON-NEXT: b callee_stack8
}
define fastcc void @caller_to8_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to8_from8:
; COMMON: sub sp, sp, #16
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at SP on entry.
tail call fastcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #16]!
; COMMON: str {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack8
}
define fastcc void @caller_to16_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to16_from8:
; COMMON: sub sp, sp, #16
; COMMON-NOT: sub sp,
; Important point is that the call reuses the "dead" argument space
; above %a on the stack. If it tries to go below incoming-SP then the
; callee will not deallocate the space, even in fastcc.
tail call fastcc void @callee_stack16([8 x i64] undef, i64 42, i64 2)
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
ret void
}
@ -68,28 +67,28 @@ define fastcc void @caller_to16_from8([8 x i64], i64 %a) {
define fastcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) {
; COMMON-LABEL: caller_to8_from24:
; COMMON: sub sp, sp, #16
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at #16 above SP on entry.
tail call fastcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #32]!
; COMMON: str {{x[0-9]+}}, [sp, #16]!
; COMMON-NEXT: b callee_stack8
}
define fastcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) {
; COMMON-LABEL: caller_to16_from16:
; COMMON: sub sp, sp, #16
; COMMON-NOT: sub sp,
; Here we want to make sure that both loads happen before the stores:
; otherwise either %a or %b will be wrongly clobbered.
tail call fastcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
ret void
; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
}

View File

@ -0,0 +1,225 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON
declare tailcc void @callee_stack0()
declare tailcc void @callee_stack8([8 x i64], i64)
declare tailcc void @callee_stack16([8 x i64], i64, i64)
declare extern_weak tailcc void @callee_weak()
define tailcc void @caller_to0_from0() nounwind {
; COMMON-LABEL: caller_to0_from0:
; COMMON-NEXT: // %bb.
tail call tailcc void @callee_stack0()
ret void
; COMMON-NEXT: b callee_stack0
}
define tailcc void @caller_to0_from8([8 x i64], i64) {
; COMMON-LABEL: caller_to0_from8:
tail call tailcc void @callee_stack0()
ret void
; COMMON: add sp, sp, #16
; COMMON-NEXT: b callee_stack0
}
define tailcc void @caller_to8_from0() "frame-pointer"="all"{
; COMMON-LABEL: caller_to8_from0:
; Key point is that the "42" should go #16 below incoming stack
; pointer (we didn't have arg space to reuse).
tail call tailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [x29, #16]
; COMMON: ldp x29, x30, [sp], #16
; If there is a sub here then the 42 will be briefly exposed to corruption
; from an interrupt if the kernel does not honour a red-zone, and a larger
; call could well overflow the red zone even if it is present.
; COMMON-NOT: sub sp,
; COMMON-NEXT: b callee_stack8
}
define tailcc void @caller_to8_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to8_from8:
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at SP on entry.
tail call tailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack8
}
define tailcc void @caller_to16_from8([8 x i64], i64 %a) {
; COMMON-LABEL: caller_to16_from8:
; COMMON-NOT: sub sp,
; Important point is that the call reuses the "dead" argument space
; above %a on the stack. If it tries to go below incoming-SP then the
; callee will not deallocate the space, even in tailcc.
tail call tailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2)
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
ret void
}
define tailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) {
; COMMON-LABEL: caller_to8_from24:
; COMMON-NOT: sub sp,
; Key point is that the "%a" should go where at #16 above SP on entry.
tail call tailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
; COMMON: str {{x[0-9]+}}, [sp, #16]!
; COMMON-NEXT: b callee_stack8
}
define tailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) {
; COMMON-LABEL: caller_to16_from16:
; COMMON-NOT: sub sp,
; Here we want to make sure that both loads happen before the stores:
; otherwise either %a or %b will be wrongly clobbered.
tail call tailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
ret void
; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
; COMMON-NEXT: b callee_stack16
}
define tailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" {
; COMMON-LABEL: disable_tail_calls:
; COMMON-NEXT: // %bb.
tail call tailcc void @callee_stack0()
ret void
; COMMON: bl callee_stack0
; COMMON: ret
}
; Weakly-referenced extern functions cannot be tail-called, as AAELF does
; not define the behaviour of branch instructions to undefined weak symbols.
define tailcc void @caller_weak() {
; COMMON-LABEL: caller_weak:
; COMMON: bl callee_weak
tail call void @callee_weak()
ret void
}
declare { [2 x float] } @get_vec2()
define { [3 x float] } @test_add_elem() {
; SDAG-LABEL: test_add_elem:
; SDAG: bl get_vec2
; SDAG: fmov s2, #1.0
; SDAG: ret
; GISEL-LABEL: test_add_elem:
; GISEL: str x30, [sp, #-16]!
; GISEL: bl get_vec2
; GISEL: fmov s2, #1.0
; GISEL: ldr x30, [sp], #16
; GISEL: ret
%call = tail call { [2 x float] } @get_vec2()
%arr = extractvalue { [2 x float] } %call, 0
%arr.0 = extractvalue [2 x float] %arr, 0
%arr.1 = extractvalue [2 x float] %arr, 1
%res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0
%res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1
%res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2
ret { [3 x float] } %res.012
}
declare double @get_double()
define { double, [2 x double] } @test_mismatched_insert() {
; COMMON-LABEL: test_mismatched_insert:
; COMMON: bl get_double
; COMMON: bl get_double
; COMMON: bl get_double
; COMMON: ret
%val0 = call double @get_double()
%val1 = call double @get_double()
%val2 = tail call double @get_double()
%res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0
%res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0
%res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1
ret { double, [2 x double] } %res.012
}
define void @fromC_totail() {
; COMMON-LABEL: fromC_totail:
; COMMON: sub sp, sp, #32
; COMMON-NOT: sub sp,
; COMMON: mov w[[TMP:[0-9]+]], #42
; COMMON: str x[[TMP]], [sp]
; COMMON: bl callee_stack8
; We must reset the stack to where it was before the call by undoing its extra stack pop.
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; COMMON: sub sp, sp, #16
call tailcc void @callee_stack8([8 x i64] undef, i64 42)
call tailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
}
define void @fromC_totail_noreservedframe(i32 %len) {
; COMMON-LABEL: fromC_totail_noreservedframe:
; COMMON: stp x29, x30, [sp, #-32]!
; COMMON: mov w[[TMP:[0-9]+]], #42
; Note stack is subtracted here to allocate space for arg
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; And here.
; COMMON: str x[[TMP]], [sp, #-16]!
; COMMON: bl callee_stack8
; But not restored here because callee_stack8 did that for us.
; COMMON-NOT: sub sp,
; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs.
%var = alloca i32, i32 %len
call tailcc void @callee_stack8([8 x i64] undef, i64 42)
call tailcc void @callee_stack8([8 x i64] undef, i64 42)
ret void
}
declare void @Ccallee_stack8([8 x i64], i64)
define tailcc void @fromtail_toC() {
; COMMON-LABEL: fromtail_toC:
; COMMON: sub sp, sp, #32
; COMMON-NOT: sub sp,
; COMMON: mov w[[TMP:[0-9]+]], #42
; COMMON: str x[[TMP]], [sp]
; COMMON: bl Ccallee_stack8
; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything.
; COMMON-NOT: add sp,
; COMMON-NOT: sub sp,
; COMMON: str x[[TMP]], [sp]{{$}}
; COMMON: bl Ccallee_stack8
; COMMON-NOT: sub sp,
call void @Ccallee_stack8([8 x i64] undef, i64 42)
call void @Ccallee_stack8([8 x i64] undef, i64 42)
ret void
}

View File

@ -0,0 +1,22 @@
; RUN: llc -mtriple=i386-apple-darwin %s -o - | FileCheck %s
declare void @clobber()
declare swifttailcc void @swifttail_callee()
define swifttailcc void @swifttail() {
; CHECK-LABEL: swifttail:
; CHECK-NOT: %rbx
call void @clobber()
tail call swifttailcc void @swifttail_callee()
ret void
}
declare swifttailcc void @swiftself(i8* swiftself)
define swifttailcc void @swifttail2(i8* %arg) {
; CHECK-LABEL: swifttail2:
; CHECK: movl {{.*}}, %ecx
; CHECK: jmp _swiftself
tail call swifttailcc void @swiftself(i8* swiftself %arg)
ret void
}

View File

@ -0,0 +1,28 @@
; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
declare swifttailcc void @swifttail_callee()
define swifttailcc void @swifttail() {
; CHECK-LABEL: swifttail:
; CHECK-NOT: popq %r14
call void asm "","~{r14}"()
tail call swifttailcc void @swifttail_callee()
ret void
}
define swifttailcc void @no_preserve_swiftself() {
; CHECK-LABEL: no_preserve_swiftself:
; CHECK-NOT: popq %r13
call void asm "","~{r13}"()
ret void
}
declare swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure)
define swiftcc i8* @CallSwiftSelf(i8* swiftself %closure, i8* %context) {
; CHECK-LABEL: CallSwiftSelf:
; CHECK: pushq %r13
;call void asm "","~{r13}"() ; We get a push r13 but why not with the call
; below?
%res = call swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure)
ret i8* %res
}

View File

@ -0,0 +1,29 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown -O0 | FileCheck %s
define swifttailcc [4 x i64] @return_int() {
; CHECK-LABEL: return_int:
; CHECK-DAG: movl $1, %eax
; CHECK-DAG: movl $2, %edx
; CHECK-DAG: movl $3, %ecx
; CHECK-DAG: movl $4, %r8d
ret [4 x i64] [i64 1, i64 2, i64 3, i64 4]
}
; CHECK: [[ONE:.LCPI.*]]:
; CHECK-NEXT: # double 1
; CHECK: [[TWO:.LCPI.*]]:
; CHECK-NEXT: # double 2
; CHECK: [[THREE:.LCPI.*]]:
; CHECK-NEXT: # double 3
define swifttailcc [4 x double] @return_float() {
; CHECK-LABEL: return_float:
; CHECK-DAG: movsd [[ONE]](%rip), %xmm1
; CHECK-DAG: movsd [[TWO]](%rip), %xmm2
; CHECK-DAG: movsd [[THREE]](%rip), %xmm3
; CHECK-DAG: xorps %xmm0, %xmm0
ret [4 x double] [double 0.0, double 1.0, double 2.0, double 3.0]
}

View File

@ -0,0 +1,65 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
declare dso_local swifttailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
define dso_local swifttailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
; CHECK-LABEL: tailcaller:
; CHECK-NOT: subq
; CHECK-NOT: addq
; CHECK: jmp tailcallee
entry:
%tmp11 = tail call swifttailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
ret i32 %tmp11
}
declare dso_local swifttailcc i8* @alias_callee()
define swifttailcc noalias i8* @noalias_caller() nounwind {
; CHECK-LABEL: noalias_caller:
; CHECK: jmp alias_callee
%p = musttail call swifttailcc i8* @alias_callee()
ret i8* %p
}
declare dso_local swifttailcc noalias i8* @noalias_callee()
define dso_local swifttailcc i8* @alias_caller() nounwind {
; CHECK-LABEL: alias_caller:
; CHECK: jmp noalias_callee # TAILCALL
%p = tail call swifttailcc noalias i8* @noalias_callee()
ret i8* %p
}
declare dso_local swifttailcc i32 @i32_callee()
define dso_local swifttailcc i32 @ret_undef() nounwind {
; CHECK-LABEL: ret_undef:
; CHECK: jmp i32_callee # TAILCALL
%p = tail call swifttailcc i32 @i32_callee()
ret i32 undef
}
declare dso_local swifttailcc void @does_not_return()
define dso_local swifttailcc i32 @noret() nounwind {
; CHECK-LABEL: noret:
; CHECK: jmp does_not_return
tail call swifttailcc void @does_not_return()
unreachable
}
define dso_local swifttailcc void @void_test(i32, i32, i32, i32) {
; CHECK-LABEL: void_test:
; CHECK: jmp void_test
entry:
tail call swifttailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
ret void
}
define dso_local swifttailcc i1 @i1test(i32, i32, i32, i32) {
; CHECK-LABEL: i1test:
; CHECK: jmp i1test
entry:
%4 = tail call swifttailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
ret i1 %4
}

View File

@ -57,7 +57,7 @@
;; Calling conventions
"ccc" "fastcc" "coldcc" "webkit_jscc" "anyregcc" "preserve_mostcc" "preserve_allcc"
"cxx_fast_tlscc" "swiftcc" "tailcc" "cfguard_checkcc"
"cxx_fast_tlscc" "swiftcc" "tailcc" "swifttailcc" "cfguard_checkcc"
;; Visibility styles
"default" "hidden" "protected"
;; DLL storages

View File

@ -176,6 +176,7 @@ syn keyword llvmKeyword
\ strictfp
\ swiftcc
\ swifterror
\ swifttailcc
\ swiftself
\ syncscope
\ tail