diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 4e4a34432b4..b789d148140 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -795,6 +795,8 @@ function. The operand fields are: * ``swiftcc`` : code 16 * ``cxx_fast_tlscc``: code 17 * ``tailcc`` : code 18 + * ``cfguard_checkcc`` : code 19 + * ``swifttailcc`` : code 20 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 7644242d2d6..6c92abedb37 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -2064,11 +2064,12 @@ Tail call optimization ---------------------- Tail call optimization, callee reusing the stack of the caller, is currently -supported on x86/x86-64, PowerPC, and WebAssembly. It is performed on x86/x86-64 -and PowerPC if: +supported on x86/x86-64, PowerPC, AArch64, and WebAssembly. It is performed on +x86/x86-64, PowerPC, and AArch64 if: * Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC - calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``. + calling convention), ``cc 11`` (HiPE calling convention), ``tailcc``, or + ``swifttailcc``. * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). @@ -2102,6 +2103,10 @@ WebAssembly constraints: * The caller and callee's return types must match. The caller cannot be void unless the callee is, too. +AArch64 constraints: + +* No variable argument lists are used. + Example: Call as ``llc -tailcallopt test.ll``. diff --git a/docs/LangRef.rst b/docs/LangRef.rst index dfdf0c191ad..68484104f99 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -432,10 +432,6 @@ added in the future: - On X86-64 the callee preserves all general purpose registers, except for RDI and RAX. -"``swiftcc``" - This calling convention is used for Swift language. - - On X86-64 RCX and R8 are available for additional integer returns, and - XMM2 and XMM3 are available for additional FP/vector returns. - - On iOS platforms, we use AAPCS-VFP calling convention. "``tailcc``" - Tail callable calling convention This calling convention ensures that calls in tail position will always be tail call optimized. This calling convention is equivalent to fastcc, @@ -444,6 +440,14 @@ added in the future: the GHC or the HiPE convention is used. `_ This calling convention does not support varargs and requires the prototype of all callees to exactly match the prototype of the function definition. +"``swiftcc``" - This calling convention is used for Swift language. + - On X86-64 RCX and R8 are available for additional integer returns, and + XMM2 and XMM3 are available for additional FP/vector returns. + - On iOS platforms, we use AAPCS-VFP calling convention. +"``swifttailcc``" + This calling convention is like ``swiftcc`` in most respects, but also the + callee pops the argument area of the stack so that mandatory tail calls are + possible as in ``tailcc``. "``cfguard_checkcc``" - Windows Control Flow Guard (Check mechanism) This calling convention is used for the Control Flow Guard check function, calls to which can be inserted before indirect calls to check that the call @@ -12386,7 +12390,7 @@ context of a Swift execution. Semantics: """""""""" -If the function has a ``swiftasync`` parameter, that argument will initially +If the caller has a ``swiftasync`` parameter, that argument will initially be stored at the returned address. If not, it will be initialized to null. '``llvm.localescape``' and '``llvm.localrecover``' Intrinsics diff --git a/include/llvm/AsmParser/LLToken.h b/include/llvm/AsmParser/LLToken.h index e068d9079f9..f261f4af908 100644 --- a/include/llvm/AsmParser/LLToken.h +++ b/include/llvm/AsmParser/LLToken.h @@ -156,6 +156,7 @@ enum Kind { kw_webkit_jscc, kw_anyregcc, kw_swiftcc, + kw_swifttailcc, kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index ec7d18129a1..fd285424652 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -86,6 +86,11 @@ namespace CallingConv { /// and has no return value. All register values are preserved. CFGuard_Check = 19, + /// SwiftTail - This follows the Swift calling convention in how arguments + /// are passed but guarantees tail calls will be made by making the callee + /// clean up their stack. + SwiftTail = 20, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 4374982bef8..00e2fef80e9 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -609,6 +609,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_regcallcc); KEYWORD(webkit_jscc); KEYWORD(swiftcc); + KEYWORD(swifttailcc); KEYWORD(anyregcc); KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index f4546b2662a..ca3efa8e662 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2124,6 +2124,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'preserve_allcc' /// ::= 'ghccc' /// ::= 'swiftcc' +/// ::= 'swifttailcc' /// ::= 'x86_intrcc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' @@ -2174,6 +2175,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break; case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_swiftcc: CC = CallingConv::Swift; break; + case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break; case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break; case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index f3e46bc0e94..e5d576d879b 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -511,9 +511,10 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { // not profitable. Also, if the callee is a special function (e.g. // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. - if (!Ret && - ((!TM.Options.GuaranteedTailCallOpt && - Call.getCallingConv() != CallingConv::Tail) || !isa(Term))) + if (!Ret && ((!TM.Options.GuaranteedTailCallOpt && + Call.getCallingConv() != CallingConv::Tail && + Call.getCallingConv() != CallingConv::SwiftTail) || + !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 57581abdd2e..9e2a462334b 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -388,6 +388,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::SPIR_FUNC: Out << "spir_func"; break; case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; + case CallingConv::SwiftTail: Out << "swifttailcc"; break; case CallingConv::X86_INTR: Out << "x86_intrcc"; break; case CallingConv::HHVM: Out << "hhvmcc"; break; case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 0cfddb55a06..4b7ce565eb1 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -421,6 +421,9 @@ def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23), X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, LR, FP)>; +def CSR_AArch64_AAPCS_SwiftTail + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; @@ -473,6 +476,9 @@ def CSR_Darwin_AArch64_AAPCS_ThisReturn def CSR_Darwin_AArch64_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; +def CSR_Darwin_AArch64_AAPCS_SwiftTail + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>; + // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the // fast path for calculation, but other registers except X0 (argument/return) diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index d3b638b5a2b..64ad462da9e 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -193,9 +193,14 @@ cl::opt EnableHomogeneousPrologEpilog( STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -/// Returns the argument pop size. -static uint64_t getArgumentPopSize(MachineFunction &MF, - MachineBasicBlock &MBB) { +/// Returns how much of the incoming argument stack area (in bytes) we should +/// clean up in an epilogue. For the C calling convention this will be 0, for +/// guaranteed tail call conventions it can be positive (a normal return or a +/// tail call to a function that uses less stack space for arguments) or +/// negative (for a tail call to a function that needs more stack space than us +/// for arguments). +static int64_t getArgumentStackToRestore(MachineFunction &MF, + MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); bool IsTailCallReturn = false; if (MBB.end() != MBBI) { @@ -206,7 +211,7 @@ static uint64_t getArgumentPopSize(MachineFunction &MF, } AArch64FunctionInfo *AFI = MF.getInfo(); - uint64_t ArgumentPopSize = 0; + int64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); @@ -255,7 +260,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)) return false; - if (Exit && getArgumentPopSize(MF, *Exit)) + if (Exit && getArgumentStackToRestore(MF, *Exit)) return false; return true; @@ -311,10 +316,10 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet) { if (!IsWin64 || IsFunclet) { - // Only Win64 uses fixed objects, and then only for the function (not - // funclets) - return 0; + return AFI->getTailCallReservedStack(); } else { + if (AFI->getTailCallReservedStack() != 0) + report_fatal_error("cannot generate ABI-changing tail call for Win64"); // Var args are stored here in the primary function. const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); // To support EH funclets we allocate an UnwindHelp object @@ -887,21 +892,17 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( ++MBBI; } unsigned NewOpc; - int Scale = 1; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; - Scale = 8; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; - Scale = 8; break; case AArch64::STPQi: NewOpc = AArch64::STPQpre; - Scale = 16; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; @@ -914,15 +915,12 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; - Scale = 8; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; - Scale = 8; break; case AArch64::LDPQi: NewOpc = AArch64::LDPQpost; - Scale = 16; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; @@ -941,9 +939,18 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( SEH->eraseFromParent(); } + TypeSize Scale = TypeSize::Fixed(1); + unsigned Width; + int64_t MinOffset, MaxOffset; + bool Success = static_cast(TII)->getMemOpInfo( + NewOpc, Scale, Width, MinOffset, MaxOffset); + (void)Success; + assert(Success && "unknown load/store opcode"); + // If the first store isn't right where we want SP then we can't fold the // update in so create a normal arithmetic instruction instead. - if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0) { + if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || + CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(CSStackSizeInc), TII, InProlog ? MachineInstr::FrameSetup @@ -966,7 +973,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); assert(CSStackSizeInc % Scale == 0); - MIB.addImm(CSStackSizeInc / Scale); + MIB.addImm(CSStackSizeInc / (int)Scale); MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands()); @@ -1667,9 +1674,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consistency with the prologue. Note that - // in the epilogue, the residual adjustment is executed first. - uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB); + // How much of the stack used by incoming arguments this function is expected + // to restore in this particular epilogue. + int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); // The stack frame should be like below, // @@ -1704,7 +1711,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); - uint64_t AfterCSRPopSize = ArgumentPopSize; + int64_t AfterCSRPopSize = ArgumentStackToRestore; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // We cannot rely on the local stack size set in emitPrologue if the function // has funclets, as funclets have different local stack size requirements, and @@ -1741,8 +1748,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); - // If the offset is 0, convert it to a post-index ldp. - if (OffsetOp.getImm() == 0) + // If the offset is 0 and the AfterCSR pop is not actually trying to + // allocate more stack for arguments (in space that an untimely interrupt + // may clobber), convert it to a post-index ldp. + if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) convertCalleeSaveRestoreToSPPrePostIncDec( MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false); else { @@ -1913,6 +1922,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (AfterCSRPopSize) { + assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an " + "interrupt may have clobbered"); // Find an insertion point for the first ldp so that it goes before the // shadow call stack epilog instruction. This ensures that the restore of // lr from x18 is placed after the restore from sp. @@ -1928,7 +1939,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed((int64_t)AfterCSRPopSize), TII, + StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } if (HasWinCFI) @@ -2146,7 +2157,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { AttributeList Attrs = MF.getFunction().getAttributes(); return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && - Attrs.hasAttrSomewhere(Attribute::SwiftError)); + Attrs.hasAttrSomewhere(Attribute::SwiftError)) && + MF.getFunction().getCallingConv() != CallingConv::SwiftTail; } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, @@ -2260,6 +2272,7 @@ static void computeCalleeSaveRegisterPairs( FirstReg = Count - 1; } int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); + bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); // When iterating backwards, the loop condition relies on unsigned wraparound. for (unsigned i = FirstReg; i < Count; i += RegInc) { @@ -2369,17 +2382,16 @@ static void computeCalleeSaveRegisterPairs( // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. - if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI && + if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 && - !RPI.isPaired()) { + !RPI.isPaired() && ByteOffset % 16 != 0) { ByteOffset += 8 * StackFillDir; - assert(ByteOffset % 16 == 0); assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); // A stack frame with a gap looks like this, bottom up: // d9, d8. x21, gap, x20, x19. - // Set extra alignment on the x21 object (the only unpaired register) - // to create the gap above it. + // Set extra alignment on the x21 object to create the gap above it. MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); + NeedGapToAlignStack = false; } int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ab20e0dbcd6..c4f88398c71 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4764,6 +4764,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: if (Subtarget->isTargetWindows() && IsVarArg) return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) @@ -5240,8 +5242,9 @@ SDValue AArch64TargetLowering::LowerCallResult( } /// Return true if the calling convention is one that we can guarantee TCO for. -static bool canGuaranteeTCO(CallingConv::ID CC) { - return CC == CallingConv::Fast; +static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { + return (CC == CallingConv::Fast && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } /// Return true if we might ever do TCO for calls with this calling convention. @@ -5251,9 +5254,12 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { case CallingConv::AArch64_SVE_VectorCall: case CallingConv::PreserveMost: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + case CallingConv::Fast: return true; default: - return canGuaranteeTCO(CC); + return false; } } @@ -5305,8 +5311,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( return false; } - if (getTargetMachine().Options.GuaranteedTailCallOpt) - return canGuaranteeTCO(CalleeCC) && CCMatch; + if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + return CCMatch; // Externally-defined functions with weak linkage should not be // tail-called on AArch64 when the OS does not support dynamic @@ -5437,7 +5443,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const { - return CallCC == CallingConv::Fast && TailCallOpt; + return (CallCC == CallingConv::Fast && TailCallOpt) || + CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; } /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, @@ -5489,7 +5496,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // A sibling call is one where we're under the usual C ABI and not planning // to change that but can still do a tail call: - if (!TailCallOpt && IsTailCall) + if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail && + CallConv != CallingConv::SwiftTail) IsSibCall = true; if (IsTailCall) @@ -5580,6 +5588,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // can actually shrink the stack. FPDiff = NumReusableBytes - NumBytes; + // Update the required reserved area if this is the tail call requiring the + // most argument stack space. + if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) + FuncInfo->setTailCallReservedStack(-FPDiff); + // The stack pointer must be 16-byte aligned at all times it's used for a // memory operation, which in practice means at *all* times and in // particular across call boundaries. Therefore our own arguments started at @@ -5591,7 +5604,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); + Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy(DAG.getDataLayout())); @@ -5848,7 +5861,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // we've carefully laid out the parameters so that when sp is reset they'll be // in the correct location. if (IsTailCall && !IsSibCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index be01f1684ee..52b53bfa501 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2695,6 +2695,38 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = 0; MaxOffset = 4095; break; + case AArch64::STPXpre: + case AArch64::LDPXpost: + case AArch64::STPDpre: + case AArch64::LDPDpost: + Scale = TypeSize::Fixed(8); + Width = 8; + MinOffset = -512; + MaxOffset = 504; + break; + case AArch64::STPQpre: + case AArch64::LDPQpost: + Scale = TypeSize::Fixed(16); + Width = 16; + MinOffset = -1024; + MaxOffset = 1008; + break; + case AArch64::STRXpre: + case AArch64::STRDpre: + case AArch64::LDRXpost: + case AArch64::LDRDpost: + Scale = TypeSize::Fixed(1); + Width = 8; + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::STRQpre: + case AArch64::LDRQpost: + Scale = TypeSize::Fixed(1); + Width = 16; + MinOffset = -256; + MaxOffset = 255; + break; case AArch64::ADDG: Scale = TypeSize::Fixed(16); Width = 0; diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index f3a85f23617..e5e08e6c00d 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -54,6 +54,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// callee is expected to pop the args. unsigned ArgumentStackToRestore = 0; + /// Space just below incoming stack pointer reserved for arguments being + /// passed on the stack during a tail call. This will be the difference + /// between the largest tail call argument space needed in this function and + /// what's already available by reusing space of incoming arguments. + unsigned TailCallReservedStack = 0; + /// HasStackFrame - True if this function has a stack frame. Set by /// determineCalleeSaves(). bool HasStackFrame = false; @@ -180,6 +186,11 @@ public: ArgumentStackToRestore = bytes; } + unsigned getTailCallReservedStack() const { return TailCallReservedStack; } + void setTailCallReservedStack(unsigned bytes) { + TailCallReservedStack = bytes; + } + bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } void setStackSizeSVE(uint64_t S) { diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 9f1b791e18b..6a2fc6251a4 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -100,6 +100,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { MF->getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_AArch64_AAPCS_SwiftError_SaveList; + if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail) + return CSR_AArch64_AAPCS_SwiftTail_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::Win64) @@ -134,6 +136,8 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const { MF->getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList; + if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail) + return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_Darwin_AArch64_RT_MostRegs_SaveList; return CSR_Darwin_AArch64_AAPCS_SaveList; @@ -199,6 +203,8 @@ AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF, ->supportSwiftError() && MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask; + if (CC == CallingConv::SwiftTail) + return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask; if (CC == CallingConv::PreserveMost) return CSR_Darwin_AArch64_RT_MostRegs_RegMask; return CSR_Darwin_AArch64_AAPCS_RegMask; @@ -233,6 +239,11 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask : CSR_AArch64_AAPCS_SwiftError_RegMask; + if (CC == CallingConv::SwiftTail) { + if (SCS) + report_fatal_error("ShadowCallStack attribute not supported with swifttail"); + return CSR_AArch64_AAPCS_SwiftTail_RegMask; + } if (CC == CallingConv::PreserveMost) return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask : CSR_AArch64_RT_MostRegs_RegMask; diff --git a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 85f810ff5ff..f87b290dfbf 100644 --- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -332,7 +332,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { } // namespace static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { - return CallConv == CallingConv::Fast && TailCallOpt; + return (CallConv == CallingConv::Fast && TailCallOpt) || + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -608,8 +609,9 @@ bool AArch64CallLowering::lowerFormalArguments( } /// Return true if the calling convention is one that we can guarantee TCO for. -static bool canGuaranteeTCO(CallingConv::ID CC) { - return CC == CallingConv::Fast; +static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { + return (CC == CallingConv::Fast && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } /// Return true if we might ever do TCO for calls with this calling convention. @@ -618,9 +620,12 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { case CallingConv::C: case CallingConv::PreserveMost: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + case CallingConv::Fast: return true; default: - return canGuaranteeTCO(CC); + return false; } } @@ -812,8 +817,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization( } // If we have -tailcallopt, then we're done. - if (MF.getTarget().Options.GuaranteedTailCallOpt) - return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) + return CalleeCC == CallerF.getCallingConv(); // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). // Try to find cases where we can do that. @@ -884,7 +889,9 @@ bool AArch64CallLowering::lowerTailCall( AArch64FunctionInfo *FuncInfo = MF.getInfo(); // True when we're tail calling, but without -tailcallopt. - bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && + Info.CallConv != CallingConv::Tail && + Info.CallConv != CallingConv::SwiftTail; // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 // register class. Until we can do that, we should fall back here. @@ -956,6 +963,11 @@ bool AArch64CallLowering::lowerTailCall( // actually shrink the stack. FPDiff = NumReusableBytes - NumBytes; + // Update the required reserved area if this is the tail call requiring the + // most argument stack space. + if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) + FuncInfo->setTailCallReservedStack(-FPDiff); + // The stack pointer must be 16-byte aligned at all times it's used for a // memory operation, which in practice means at *all* times and in // particular across call boundaries. Therefore our own arguments started at @@ -1003,12 +1015,12 @@ bool AArch64CallLowering::lowerTailCall( // sequence start and end here. if (!IsSibCall) { MIB->getOperand(1).setImm(FPDiff); - CallSeqStart.addImm(NumBytes).addImm(0); + CallSeqStart.addImm(0).addImm(0); // End the call sequence *before* emitting the call. Normally, we would // tidy the frame up after the call. However, here, we've laid out the // parameters so that when SP is reset, they will be in the correct // location. - MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0); + MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0); } // Now we can add the actual call instruction to the correct basic block. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2552d446749..36ea80a5203 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1996,6 +1996,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: case CallingConv::Swift: + case CallingConv::SwiftTail: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 45b814134c6..c86d658653f 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -462,6 +462,7 @@ def RetCC_X86_64 : CallingConv<[ // Handle Swift calls. CCIfCC<"CallingConv::Swift", CCDelegateTo>, + CCIfCC<"CallingConv::SwiftTail", CCDelegateTo>, // Handle explicit CC selection CCIfCC<"CallingConv::Win64", CCDelegateTo>, @@ -517,9 +518,15 @@ def CC_X86_64_C : CallingConv<[ // A SwiftError is passed in R12. CCIfSwiftError>>, - // For Swift Calling Convention, pass sret in %rax. + // Pass SwiftAsync in an otherwise callee saved register so that calls to + // normal functions don't need to save it somewhere. + CCIfSwiftAsync>>, + + // For Swift Calling Conventions, pass sret in %rax. CCIfCC<"CallingConv::Swift", CCIfSRet>>>, + CCIfCC<"CallingConv::SwiftTail", + CCIfSRet>>>, // Pointers are always passed in full 64-bit registers. CCIfPtr>, @@ -851,6 +858,10 @@ def CC_X86_32_C : CallingConv<[ // The 'nest' parameter, if any, is passed in ECX. CCIfNest>, + // On swifttailcc pass swiftself in ECX. + CCIfCC<"CallingConv::SwiftTail", + CCIfSwiftSelf>>>, + // The first 3 integer arguments, if marked 'inreg' and if the call is not // a vararg call, are passed in integer registers. CCIfNotVarArg>>>, @@ -1080,6 +1091,7 @@ def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>; def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>; def CSR_64_SwiftError : CalleeSavedRegs<(sub CSR_64, R12)>; +def CSR_64_SwiftTail : CalleeSavedRegs<(sub CSR_64, R13, R14)>; def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>; def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; @@ -1090,6 +1102,7 @@ def CSR_Win64 : CalleeSavedRegs<(add CSR_Win64_NoSSE, (sequence "XMM%u", 6, 15))>; def CSR_Win64_SwiftError : CalleeSavedRegs<(sub CSR_Win64, R12)>; +def CSR_Win64_SwiftTail : CalleeSavedRegs<(sub CSR_Win64, R13, R14)>; // The function used by Darwin to obtain the address of a thread-local variable // uses rdi to pass a single parameter and rax for the return value. All other diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index bd08af81e67..cf8d5d6c5b7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1183,6 +1183,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::Tail && + CC != CallingConv::SwiftTail && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && @@ -1197,7 +1198,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || - CC == CallingConv::Tail) + CC == CallingConv::Tail || CC == CallingConv::SwiftTail) return false; // Let SDISel handle vararg functions. @@ -1285,7 +1286,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. - if (F.hasStructRetAttr() && CC != CallingConv::Swift) { + if (F.hasStructRetAttr() && CC != CallingConv::Swift && + CC != CallingConv::SwiftTail) { Register Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); @@ -3142,7 +3144,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE || CC == CallingConv::Tail) + CC == CallingConv::HiPE || CC == CallingConv::Tail || + CC == CallingConv::SwiftTail) return 0; if (CB) @@ -3194,6 +3197,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: + case CallingConv::SwiftTail: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: @@ -3210,7 +3214,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || - CC == CallingConv::Tail) + CC == CallingConv::Tail || CC == CallingConv::SwiftTail) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 33af959229c..66df93319b6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3184,7 +3184,8 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM || CC == CallingConv::Tail); + CC == CallingConv::HHVM || CC == CallingConv::Tail || + CC == CallingConv::SwiftTail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -3210,7 +3211,8 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3747,7 +3749,7 @@ SDValue X86TargetLowering::LowerFormalArguments( for (unsigned I = 0, E = Ins.size(); I != E; ++I) { // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. - if (CallConv == CallingConv::Swift) + if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) continue; // All x86 ABIs require that for returning structs by value we copy the @@ -3912,7 +3914,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || - CallConv == CallingConv::Tail; + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; X86MachineFunctionInfo *X86Info = MF.getInfo(); bool HasNCSR = (CB && isa(CB) && CB->hasFnAttr("no_caller_saved_registers")); @@ -4622,7 +4624,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || - CalleeCC == CallingConv::Tail; + CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -26836,6 +26838,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, case CallingConv::X86_ThisCall: case CallingConv::Fast: case CallingConv::Tail: + case CallingConv::SwiftTail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 899542cdbb3..0adcd572b2d 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -354,6 +354,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (!HasSSE) return CSR_Win64_NoSSE_SaveList; return CSR_Win64_SaveList; + case CallingConv::SwiftTail: + if (!Is64Bit) + return CSR_32_SaveList; + return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; case CallingConv::X86_64_SysV: if (CallsEHReturn) return CSR_64EHRet_SaveList; @@ -470,6 +474,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, break; case CallingConv::Win64: return CSR_Win64_RegMask; + case CallingConv::SwiftTail: + if (!Is64Bit) + return CSR_32_RegMask; + return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; case CallingConv::X86_INTR: @@ -502,6 +510,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; + return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e6a5be7cf63..7e53eb172a6 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -889,6 +889,7 @@ public: case CallingConv::Fast: case CallingConv::Tail: case CallingConv::Swift: + case CallingConv::SwiftTail: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll index 0d08d8d1c9d..9ac8c06acde 100644 --- a/test/Bitcode/compatibility.ll +++ b/test/Bitcode/compatibility.ll @@ -382,6 +382,8 @@ declare preserve_mostcc void @f.preserve_mostcc() ; CHECK: declare preserve_mostcc void @f.preserve_mostcc() declare preserve_allcc void @f.preserve_allcc() ; CHECK: declare preserve_allcc void @f.preserve_allcc() +declare swifttailcc void @f.swifttailcc() +; CHECK: declare swifttailcc void @f.swifttailcc() declare cc64 void @f.cc64() ; CHECK: declare x86_stdcallcc void @f.cc64() declare x86_stdcallcc void @f.x86_stdcallcc() diff --git a/test/CodeGen/AArch64/swift-async.ll b/test/CodeGen/AArch64/swift-async.ll index 0fd0140b845..6042e27408a 100644 --- a/test/CodeGen/AArch64/swift-async.ll +++ b/test/CodeGen/AArch64/swift-async.ll @@ -5,7 +5,7 @@ ; Important details in prologue: ; * x22 is stored just below x29 ; * Enough stack space is allocated for everything -define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" { +define swifttailcc void @simple(i8* swiftasync %ctx) "frame-pointer"="all" { ; CHECK-LABEL: simple: ; CHECK: orr x29, x29, #0x100000000000000 ; CHECK: sub sp, sp, #32 @@ -32,21 +32,20 @@ define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" { ret void } -define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" { +define swifttailcc void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" { ; CHECK-LABEL: more_csrs: ; CHECK: orr x29, x29, #0x100000000000000 -; CHECK: sub sp, sp, #48 -; CHECK: stp x24, x23, [sp, #8] -; CHECK: stp x29, x30, [sp, #32] +; CHECK: str x23, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #16] -; CHECK-NOAUTH: str x22, [sp, #24] -; CHECK-AUTH: add x16, sp, #24 +; CHECK-NOAUTH: str x22, [sp, #8] +; CHECK-AUTH: add x16, sp, #8 ; CHECK-AUTH: movk x16, #49946, lsl #48 ; CHECK-AUTH: mov x17, x22 ; CHECK-AUTH: pacdb x17, x16 -; CHECK-AUTH: str x17, [sp, #24] +; CHECK-AUTH: str x17, [sp, #8] -; CHECK: add x29, sp, #32 +; CHECK: add x29, sp, #16 ; CHECK: .cfi_def_cfa w29, 16 ; CHECK: .cfi_offset w30, -8 ; CHECK: .cfi_offset w29, -16 @@ -54,15 +53,14 @@ define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" { ; [...] -; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x24, x23, [sp, #8] +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldr x23, [sp], #32 ; CHECK: and x29, x29, #0xefffffffffffffff -; CHECK: add sp, sp, #48 call void asm sideeffect "", "~{x23}"() ret void } -define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" { +define swifttailcc void @locals(i8* swiftasync %ctx) "frame-pointer"="all" { ; CHECK-LABEL: locals: ; CHECK: orr x29, x29, #0x100000000000000 ; CHECK: sub sp, sp, #64 @@ -93,7 +91,7 @@ define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" { ret void } -define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" { +define swifttailcc void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" { ; CHECK-LABEL: use_input_context: ; CHECK-NOAUTH: str x22, [sp @@ -106,7 +104,7 @@ define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"=" ret void } -define i8** @context_in_func() "frame-pointer"="non-leaf" { +define swifttailcc i8** @context_in_func() "frame-pointer"="non-leaf" { ; CHECK-LABEL: context_in_func: ; CHECK-NOAUTH: str xzr, [sp, #8] @@ -120,7 +118,7 @@ define i8** @context_in_func() "frame-pointer"="non-leaf" { ret i8** %ptr } -define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" { +define swifttailcc void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" { ; CHECK-LABEL: write_frame_context: ; CHECK: sub x[[ADDR:[0-9]+]], x29, #8 ; CHECK: str x0, [x[[ADDR]]] @@ -129,29 +127,48 @@ define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointe ret void } -define void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" { +define swifttailcc void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" { ; CHECK-LABEL: simple_fp_elim: ; CHECK-NOT: orr x29, x29, #0x100000000000000 ret void } -define void @large_frame(i8* swiftasync %ctx) "frame-pointer"="all" { +define swifttailcc void @large_frame(i8* swiftasync %ctx) "frame-pointer"="all" { ; CHECK-LABEL: large_frame: -; CHECK: sub sp, sp, #48 -; CHECK: stp x28, x27, [sp, #8] -; CHECK: stp x29, x30, [sp, #32] -; CHECK-NOAUTH: str x22, [sp, #24] -; CHECK: add x29, sp, #32 +; CHECK: str x28, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #16] +; CHECK-NOAUTH: str x22, [sp, #8] +; CHECK: add x29, sp, #16 ; CHECK: sub sp, sp, #1024 ; [...] ; CHECK: add sp, sp, #1024 -; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x28, x27, [sp, #8] +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldr x28, [sp], #32 ; CHECK: ret %var = alloca i8, i32 1024 ret void } -declare void @bar(i32*) +; Important point is that there is just one 8-byte gap in the CSR region (right +; now just above d8) to realign the stack. +define swifttailcc void @two_unpaired_csrs(i8* swiftasync) "frame-pointer"="all" { +; CHECK-LABEL: two_unpaired_csrs: +; CHECK: str d8, [sp, #-48]! +; CHECK: str x19, [sp, #16] +; CHECK: stp x29, x30, [sp, #32] +; CHECK-NOAUTH: str x22, [sp, #24] +; CHECK: add x29, sp, #32 + +; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_offset w30, -8 +; CHECK: .cfi_offset w29, -16 +; CHECK: .cfi_offset w19, -32 +; CHECK: .cfi_offset b8, -48 + + call void asm "","~{x19},~{d8}"() + call swifttailcc void @bar(i32* undef) + ret void +} +declare swifttailcc void @bar(i32*) declare i8** @llvm.swift.async.context.addr() diff --git a/test/CodeGen/AArch64/swifttail-async.ll b/test/CodeGen/AArch64/swifttail-async.ll new file mode 100644 index 00000000000..2004b6107bd --- /dev/null +++ b/test/CodeGen/AArch64/swifttail-async.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - | FileCheck %s + + +declare swifttailcc void @swifttail_callee() +define swifttailcc void @swifttail() { +; CHECK-LABEL: swifttail: +; CHECK-NOT: ld{{.*}}x22 + call void asm "","~{x22}"() + tail call swifttailcc void @swifttail_callee() + ret void +} + +define swifttailcc void @no_preserve_swiftself() { +; CHECK-LABEL: no_preserve_swiftself: +; CHECK-NOT: ld{{.*}}x20 + call void asm "","~{x20}"() + ret void +} diff --git a/test/CodeGen/AArch64/swifttail-call.ll b/test/CodeGen/AArch64/swifttail-call.ll new file mode 100644 index 00000000000..70c5a58f412 --- /dev/null +++ b/test/CodeGen/AArch64/swifttail-call.ll @@ -0,0 +1,230 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON +; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON + +declare swifttailcc void @callee_stack0() +declare swifttailcc void @callee_stack8([8 x i64], i64) +declare swifttailcc void @callee_stack16([8 x i64], i64, i64) +declare extern_weak swifttailcc void @callee_weak() + +define swifttailcc void @caller_to0_from0() nounwind { +; COMMON-LABEL: caller_to0_from0: +; COMMON-NEXT: // %bb. + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON-NEXT: b callee_stack0 +} + +define swifttailcc void @caller_to0_from8([8 x i64], i64) { +; COMMON-LABEL: caller_to0_from8: + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON: add sp, sp, #16 +; COMMON-NEXT: b callee_stack0 +} + +define swifttailcc void @caller_to8_from0() { +; COMMON-LABEL: caller_to8_from0: + +; Key point is that the "42" should go #16 below incoming stack +; pointer (we didn't have arg space to reuse). + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #-16]! +; COMMON-NEXT: b callee_stack8 +} + +define swifttailcc void @caller_to8_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to8_from8: +; COMMON-NOT: sub sp, + +; Key point is that the "%a" should go where at SP on entry. + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack8 +} + +define swifttailcc void @caller_to16_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to16_from8: +; COMMON-NOT: sub sp, + +; Important point is that the call reuses the "dead" argument space +; above %a on the stack. If it tries to go below incoming-SP then the +; callee will not deallocate the space, even in swifttailcc. + tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) + +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack16 + ret void +} + + +define swifttailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { +; COMMON-LABEL: caller_to8_from24: +; COMMON-NOT: sub sp, + +; Key point is that the "%a" should go where at #16 above SP on entry. + tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + + +define swifttailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { +; COMMON-LABEL: caller_to16_from16: +; COMMON-NOT: sub sp, + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) + ret void + +; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack16 +} + +define swifttailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { +; COMMON-LABEL: disable_tail_calls: +; COMMON-NEXT: // %bb. + + tail call swifttailcc void @callee_stack0() + ret void + +; COMMON: bl callee_stack0 +; COMMON: ret +} + +; Weakly-referenced extern functions cannot be tail-called, as AAELF does +; not define the behaviour of branch instructions to undefined weak symbols. +define swifttailcc void @caller_weak() { +; COMMON-LABEL: caller_weak: +; COMMON: bl callee_weak + tail call void @callee_weak() + ret void +} + +declare { [2 x float] } @get_vec2() + +define { [3 x float] } @test_add_elem() { +; SDAG-LABEL: test_add_elem: +; SDAG: bl get_vec2 +; SDAG: fmov s2, #1.0 +; SDAG: ret +; GISEL-LABEL: test_add_elem: +; GISEL: str x30, [sp, #-16]! +; GISEL: bl get_vec2 +; GISEL: fmov s2, #1.0 +; GISEL: ldr x30, [sp], #16 +; GISEL: ret + + %call = tail call { [2 x float] } @get_vec2() + %arr = extractvalue { [2 x float] } %call, 0 + %arr.0 = extractvalue [2 x float] %arr, 0 + %arr.1 = extractvalue [2 x float] %arr, 1 + + %res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0 + %res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1 + %res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2 + ret { [3 x float] } %res.012 +} + +declare double @get_double() +define { double, [2 x double] } @test_mismatched_insert() { +; COMMON-LABEL: test_mismatched_insert: +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: ret + + %val0 = call double @get_double() + %val1 = call double @get_double() + %val2 = tail call double @get_double() + + %res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0 + %res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0 + %res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1 + + ret { double, [2 x double] } %res.012 +} + +define void @fromC_totail() { +; COMMON-LABEL: fromC_totail: +; COMMON: sub sp, sp, #48 + +; COMMON-NOT: sub sp, +; COMMON: mov w[[TMP:[0-9]+]], #42 +; COMMON: str x[[TMP]], [sp] +; COMMON: bl callee_stack8 + ; We must reset the stack to where it was before the call by undoing its extra stack pop. +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 +; COMMON: sub sp, sp, #16 + + call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void +} + +define void @fromC_totail_noreservedframe(i32 %len) { +; COMMON-LABEL: fromC_totail_noreservedframe: +; COMMON: stp x29, x30, [sp, #-48]! + +; COMMON: mov w[[TMP:[0-9]+]], #42 + ; Note stack is subtracted here to allocate space for arg +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 + ; And here. +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 + ; But not restored here because callee_stack8 did that for us. +; COMMON-NOT: sub sp, + + ; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs. + %var = alloca i32, i32 %len + + call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + call swifttailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void +} + +declare void @Ccallee_stack8([8 x i64], i64) + +define swifttailcc void @fromtail_toC() { +; COMMON-LABEL: fromtail_toC: +; COMMON: sub sp, sp, #32 + +; COMMON-NOT: sub sp, +; COMMON: mov w[[TMP:[0-9]+]], #42 +; COMMON: str x[[TMP]], [sp] +; COMMON: bl Ccallee_stack8 + ; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything. +; COMMON-NOT: add sp, +; COMMON-NOT: sub sp, +; COMMON: str x[[TMP]], [sp]{{$}} +; COMMON: bl Ccallee_stack8 +; COMMON-NOT: sub sp, + + + call void @Ccallee_stack8([8 x i64] undef, i64 42) + call void @Ccallee_stack8([8 x i64] undef, i64 42) + ret void +} + +declare swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) +define swiftcc i8* @CallSwiftSelf(i8* swiftself %closure, i8* %context) { +; CHECK-LABEL: CallSwiftSelf: +; CHECK: stp x20 + ;call void asm "","~{r13}"() ; We get a push r13 but why not with the call + ; below? + %res = call swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) + ret i8* %res +} diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index 87dedf4457f..537754f4526 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -28,39 +28,38 @@ define fastcc void @caller_to0_from8([8 x i64], i64) { define fastcc void @caller_to8_from0() { ; COMMON-LABEL: caller_to8_from0: -; COMMON: sub sp, sp, #32 ; Key point is that the "42" should go #16 below incoming stack ; pointer (we didn't have arg space to reuse). tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void -; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON: str {{x[0-9]+}}, [sp, #-16]! ; COMMON-NEXT: b callee_stack8 } define fastcc void @caller_to8_from8([8 x i64], i64 %a) { ; COMMON-LABEL: caller_to8_from8: -; COMMON: sub sp, sp, #16 +; COMMON-NOT: sub sp, ; Key point is that the "%a" should go where at SP on entry. tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void -; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON: str {{x[0-9]+}}, [sp] ; COMMON-NEXT: b callee_stack8 } define fastcc void @caller_to16_from8([8 x i64], i64 %a) { ; COMMON-LABEL: caller_to16_from8: -; COMMON: sub sp, sp, #16 +; COMMON-NOT: sub sp, ; Important point is that the call reuses the "dead" argument space ; above %a on the stack. If it tries to go below incoming-SP then the ; callee will not deallocate the space, even in fastcc. tail call fastcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) -; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] ; COMMON-NEXT: b callee_stack16 ret void } @@ -68,28 +67,28 @@ define fastcc void @caller_to16_from8([8 x i64], i64 %a) { define fastcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; COMMON-LABEL: caller_to8_from24: -; COMMON: sub sp, sp, #16 +; COMMON-NOT: sub sp, ; Key point is that the "%a" should go where at #16 above SP on entry. tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void -; COMMON: str {{x[0-9]+}}, [sp, #32]! +; COMMON: str {{x[0-9]+}}, [sp, #16]! ; COMMON-NEXT: b callee_stack8 } define fastcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { ; COMMON-LABEL: caller_to16_from16: -; COMMON: sub sp, sp, #16 +; COMMON-NOT: sub sp, ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. tail call fastcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) ret void -; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] ; COMMON-NEXT: b callee_stack16 } diff --git a/test/CodeGen/AArch64/tailcc-tail-call.ll b/test/CodeGen/AArch64/tailcc-tail-call.ll new file mode 100644 index 00000000000..57b9f8e1f72 --- /dev/null +++ b/test/CodeGen/AArch64/tailcc-tail-call.ll @@ -0,0 +1,225 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=SDAG,COMMON +; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=GISEL,COMMON + +declare tailcc void @callee_stack0() +declare tailcc void @callee_stack8([8 x i64], i64) +declare tailcc void @callee_stack16([8 x i64], i64, i64) +declare extern_weak tailcc void @callee_weak() + +define tailcc void @caller_to0_from0() nounwind { +; COMMON-LABEL: caller_to0_from0: +; COMMON-NEXT: // %bb. + + tail call tailcc void @callee_stack0() + ret void + +; COMMON-NEXT: b callee_stack0 +} + +define tailcc void @caller_to0_from8([8 x i64], i64) { +; COMMON-LABEL: caller_to0_from8: + + tail call tailcc void @callee_stack0() + ret void + +; COMMON: add sp, sp, #16 +; COMMON-NEXT: b callee_stack0 +} + +define tailcc void @caller_to8_from0() "frame-pointer"="all"{ +; COMMON-LABEL: caller_to8_from0: + +; Key point is that the "42" should go #16 below incoming stack +; pointer (we didn't have arg space to reuse). + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [x29, #16] +; COMMON: ldp x29, x30, [sp], #16 + ; If there is a sub here then the 42 will be briefly exposed to corruption + ; from an interrupt if the kernel does not honour a red-zone, and a larger + ; call could well overflow the red zone even if it is present. +; COMMON-NOT: sub sp, +; COMMON-NEXT: b callee_stack8 +} + +define tailcc void @caller_to8_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to8_from8: +; COMMON-NOT: sub sp, + +; Key point is that the "%a" should go where at SP on entry. + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack8 +} + +define tailcc void @caller_to16_from8([8 x i64], i64 %a) { +; COMMON-LABEL: caller_to16_from8: +; COMMON-NOT: sub sp, + +; Important point is that the call reuses the "dead" argument space +; above %a on the stack. If it tries to go below incoming-SP then the +; callee will not deallocate the space, even in tailcc. + tail call tailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) + +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack16 + ret void +} + + +define tailcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { +; COMMON-LABEL: caller_to8_from24: +; COMMON-NOT: sub sp, + +; Key point is that the "%a" should go where at #16 above SP on entry. + tail call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void + +; COMMON: str {{x[0-9]+}}, [sp, #16]! +; COMMON-NEXT: b callee_stack8 +} + + +define tailcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { +; COMMON-LABEL: caller_to16_from16: +; COMMON-NOT: sub sp, + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call tailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) + ret void + +; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; COMMON-NEXT: b callee_stack16 +} + +define tailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { +; COMMON-LABEL: disable_tail_calls: +; COMMON-NEXT: // %bb. + + tail call tailcc void @callee_stack0() + ret void + +; COMMON: bl callee_stack0 +; COMMON: ret +} + +; Weakly-referenced extern functions cannot be tail-called, as AAELF does +; not define the behaviour of branch instructions to undefined weak symbols. +define tailcc void @caller_weak() { +; COMMON-LABEL: caller_weak: +; COMMON: bl callee_weak + tail call void @callee_weak() + ret void +} + +declare { [2 x float] } @get_vec2() + +define { [3 x float] } @test_add_elem() { +; SDAG-LABEL: test_add_elem: +; SDAG: bl get_vec2 +; SDAG: fmov s2, #1.0 +; SDAG: ret +; GISEL-LABEL: test_add_elem: +; GISEL: str x30, [sp, #-16]! +; GISEL: bl get_vec2 +; GISEL: fmov s2, #1.0 +; GISEL: ldr x30, [sp], #16 +; GISEL: ret + + %call = tail call { [2 x float] } @get_vec2() + %arr = extractvalue { [2 x float] } %call, 0 + %arr.0 = extractvalue [2 x float] %arr, 0 + %arr.1 = extractvalue [2 x float] %arr, 1 + + %res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0 + %res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1 + %res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2 + ret { [3 x float] } %res.012 +} + +declare double @get_double() +define { double, [2 x double] } @test_mismatched_insert() { +; COMMON-LABEL: test_mismatched_insert: +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: bl get_double +; COMMON: ret + + %val0 = call double @get_double() + %val1 = call double @get_double() + %val2 = tail call double @get_double() + + %res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0 + %res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0 + %res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1 + + ret { double, [2 x double] } %res.012 +} + +define void @fromC_totail() { +; COMMON-LABEL: fromC_totail: +; COMMON: sub sp, sp, #32 + +; COMMON-NOT: sub sp, +; COMMON: mov w[[TMP:[0-9]+]], #42 +; COMMON: str x[[TMP]], [sp] +; COMMON: bl callee_stack8 + ; We must reset the stack to where it was before the call by undoing its extra stack pop. +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 +; COMMON: sub sp, sp, #16 + + call tailcc void @callee_stack8([8 x i64] undef, i64 42) + call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void +} + +define void @fromC_totail_noreservedframe(i32 %len) { +; COMMON-LABEL: fromC_totail_noreservedframe: +; COMMON: stp x29, x30, [sp, #-32]! + +; COMMON: mov w[[TMP:[0-9]+]], #42 + ; Note stack is subtracted here to allocate space for arg +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 + ; And here. +; COMMON: str x[[TMP]], [sp, #-16]! +; COMMON: bl callee_stack8 + ; But not restored here because callee_stack8 did that for us. +; COMMON-NOT: sub sp, + + ; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs. + %var = alloca i32, i32 %len + + call tailcc void @callee_stack8([8 x i64] undef, i64 42) + call tailcc void @callee_stack8([8 x i64] undef, i64 42) + ret void +} + +declare void @Ccallee_stack8([8 x i64], i64) + +define tailcc void @fromtail_toC() { +; COMMON-LABEL: fromtail_toC: +; COMMON: sub sp, sp, #32 + +; COMMON-NOT: sub sp, +; COMMON: mov w[[TMP:[0-9]+]], #42 +; COMMON: str x[[TMP]], [sp] +; COMMON: bl Ccallee_stack8 + ; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything. +; COMMON-NOT: add sp, +; COMMON-NOT: sub sp, +; COMMON: str x[[TMP]], [sp]{{$}} +; COMMON: bl Ccallee_stack8 +; COMMON-NOT: sub sp, + + + call void @Ccallee_stack8([8 x i64] undef, i64 42) + call void @Ccallee_stack8([8 x i64] undef, i64 42) + ret void +} diff --git a/test/CodeGen/X86/swifttail-async-i386.ll b/test/CodeGen/X86/swifttail-async-i386.ll new file mode 100644 index 00000000000..60a8ae8fe07 --- /dev/null +++ b/test/CodeGen/X86/swifttail-async-i386.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=i386-apple-darwin %s -o - | FileCheck %s + +declare void @clobber() + +declare swifttailcc void @swifttail_callee() +define swifttailcc void @swifttail() { +; CHECK-LABEL: swifttail: +; CHECK-NOT: %rbx + call void @clobber() + tail call swifttailcc void @swifttail_callee() + ret void +} + +declare swifttailcc void @swiftself(i8* swiftself) + +define swifttailcc void @swifttail2(i8* %arg) { +; CHECK-LABEL: swifttail2: +; CHECK: movl {{.*}}, %ecx +; CHECK: jmp _swiftself + tail call swifttailcc void @swiftself(i8* swiftself %arg) + ret void +} diff --git a/test/CodeGen/X86/swifttail-async.ll b/test/CodeGen/X86/swifttail-async.ll new file mode 100644 index 00000000000..14c4e0d95fd --- /dev/null +++ b/test/CodeGen/X86/swifttail-async.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s + + +declare swifttailcc void @swifttail_callee() +define swifttailcc void @swifttail() { +; CHECK-LABEL: swifttail: +; CHECK-NOT: popq %r14 + call void asm "","~{r14}"() + tail call swifttailcc void @swifttail_callee() + ret void +} + +define swifttailcc void @no_preserve_swiftself() { +; CHECK-LABEL: no_preserve_swiftself: +; CHECK-NOT: popq %r13 + call void asm "","~{r13}"() + ret void +} + +declare swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) +define swiftcc i8* @CallSwiftSelf(i8* swiftself %closure, i8* %context) { +; CHECK-LABEL: CallSwiftSelf: +; CHECK: pushq %r13 + ;call void asm "","~{r13}"() ; We get a push r13 but why not with the call + ; below? + %res = call swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) + ret i8* %res +} diff --git a/test/CodeGen/X86/swifttail-return.ll b/test/CodeGen/X86/swifttail-return.ll new file mode 100644 index 00000000000..8bea68302b8 --- /dev/null +++ b/test/CodeGen/X86/swifttail-return.ll @@ -0,0 +1,29 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-unknown-unknown -O0 | FileCheck %s + +define swifttailcc [4 x i64] @return_int() { +; CHECK-LABEL: return_int: +; CHECK-DAG: movl $1, %eax +; CHECK-DAG: movl $2, %edx +; CHECK-DAG: movl $3, %ecx +; CHECK-DAG: movl $4, %r8d + + ret [4 x i64] [i64 1, i64 2, i64 3, i64 4] +} + + +; CHECK: [[ONE:.LCPI.*]]: +; CHECK-NEXT: # double 1 +; CHECK: [[TWO:.LCPI.*]]: +; CHECK-NEXT: # double 2 +; CHECK: [[THREE:.LCPI.*]]: +; CHECK-NEXT: # double 3 + +define swifttailcc [4 x double] @return_float() { +; CHECK-LABEL: return_float: +; CHECK-DAG: movsd [[ONE]](%rip), %xmm1 +; CHECK-DAG: movsd [[TWO]](%rip), %xmm2 +; CHECK-DAG: movsd [[THREE]](%rip), %xmm3 +; CHECK-DAG: xorps %xmm0, %xmm0 + ret [4 x double] [double 0.0, double 1.0, double 2.0, double 3.0] +} diff --git a/test/CodeGen/X86/tailcall-swifttailcc.ll b/test/CodeGen/X86/tailcall-swifttailcc.ll new file mode 100644 index 00000000000..6c92280d01e --- /dev/null +++ b/test/CodeGen/X86/tailcall-swifttailcc.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +declare dso_local swifttailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define dso_local swifttailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +; CHECK-LABEL: tailcaller: +; CHECK-NOT: subq +; CHECK-NOT: addq +; CHECK: jmp tailcallee +entry: + %tmp11 = tail call swifttailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare dso_local swifttailcc i8* @alias_callee() + +define swifttailcc noalias i8* @noalias_caller() nounwind { +; CHECK-LABEL: noalias_caller: +; CHECK: jmp alias_callee + %p = musttail call swifttailcc i8* @alias_callee() + ret i8* %p +} + +declare dso_local swifttailcc noalias i8* @noalias_callee() + +define dso_local swifttailcc i8* @alias_caller() nounwind { +; CHECK-LABEL: alias_caller: +; CHECK: jmp noalias_callee # TAILCALL + %p = tail call swifttailcc noalias i8* @noalias_callee() + ret i8* %p +} + +declare dso_local swifttailcc i32 @i32_callee() + +define dso_local swifttailcc i32 @ret_undef() nounwind { +; CHECK-LABEL: ret_undef: +; CHECK: jmp i32_callee # TAILCALL + %p = tail call swifttailcc i32 @i32_callee() + ret i32 undef +} + +declare dso_local swifttailcc void @does_not_return() + +define dso_local swifttailcc i32 @noret() nounwind { +; CHECK-LABEL: noret: +; CHECK: jmp does_not_return + tail call swifttailcc void @does_not_return() + unreachable +} + +define dso_local swifttailcc void @void_test(i32, i32, i32, i32) { +; CHECK-LABEL: void_test: +; CHECK: jmp void_test + entry: + tail call swifttailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) + ret void +} + +define dso_local swifttailcc i1 @i1test(i32, i32, i32, i32) { +; CHECK-LABEL: i1test: +; CHECK: jmp i1test + entry: + %4 = tail call swifttailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) + ret i1 %4 +} diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el index 2148de94b91..3ca86ae2cc9 100644 --- a/utils/emacs/llvm-mode.el +++ b/utils/emacs/llvm-mode.el @@ -57,7 +57,7 @@ ;; Calling conventions "ccc" "fastcc" "coldcc" "webkit_jscc" "anyregcc" "preserve_mostcc" "preserve_allcc" - "cxx_fast_tlscc" "swiftcc" "tailcc" "cfguard_checkcc" + "cxx_fast_tlscc" "swiftcc" "tailcc" "swifttailcc" "cfguard_checkcc" ;; Visibility styles "default" "hidden" "protected" ;; DLL storages diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim index 0c6eb6600de..684a42558ca 100644 --- a/utils/vim/syntax/llvm.vim +++ b/utils/vim/syntax/llvm.vim @@ -176,6 +176,7 @@ syn keyword llvmKeyword \ strictfp \ swiftcc \ swifterror + \ swifttailcc \ swiftself \ syncscope \ tail