diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index c8871a1f3e6..84364dbb230 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -3084,7 +3084,9 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) { if (Attrs.hasParamAttribute(I, AK)) Copy.addAttribute(AK); } - if (Attrs.hasParamAttribute(I, Attribute::Alignment)) + // `align` is ABI-affecting only in combination with `byval`. + if (Attrs.hasParamAttribute(I, Attribute::Alignment) && + Attrs.hasParamAttribute(I, Attribute::ByVal)) Copy.addAlignmentAttr(Attrs.getParamAlignment(I)); return Copy; } diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index da8eb36e724..55cca12acd0 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -61,14 +61,14 @@ void Lowerer::lowerResumeOrDestroy(CallSite CS, // TODO: Handle the case when coroutine promise alloca has align override. void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) { Value *Operand = Intrin->getArgOperand(0); - unsigned Alignement = Intrin->getAlignment(); + Align Alignment = Intrin->getAlignment(); Type *Int8Ty = Builder.getInt8Ty(); auto *SampleStruct = StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty}); const DataLayout &DL = TheModule.getDataLayout(); int64_t Offset = alignTo( - DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignement); + DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignment); if (Intrin->isFromPromise()) Offset = -Offset; diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index ecf5450a186..e89c890386b 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -34,7 +34,8 @@ struct Lowerer : coro::LowererBase { Lowerer(Module &M) : LowererBase(M) {} - void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA); + void elideHeapAllocations(Function *F, uint64_t FrameSize, + MaybeAlign FrameAlign, AAResults &AA); bool shouldElide(Function *F, DominatorTree &DT) const; void collectPostSplitCoroIds(Function *F); bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT); @@ -92,10 +93,23 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { } } -// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type. -static Type *getFrameType(Function *Resume) { - auto *ArgType = Resume->arg_begin()->getType(); - return cast(ArgType)->getElementType(); +// Given a resume function @f.resume(%f.frame* %frame), returns the size +// and expected alignment of %f.frame type. +static std::pair getFrameLayout(Function *Resume) { + // Prefer to pull information from the function attributes. + auto Size = Resume->getParamDereferenceableBytes(0); + auto Align = Resume->getParamAlign(0); + + // If those aren't given, extract them from the type. + if (Size == 0 || !Align) { + auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType(); + + const DataLayout &DL = Resume->getParent()->getDataLayout(); + if (!Size) Size = DL.getTypeAllocSize(FrameTy); + if (!Align) Align = DL.getABITypeAlign(FrameTy); + } + + return std::make_pair(Size, Align); } // Finds first non alloca instruction in the entry block of a function. @@ -108,8 +122,9 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { // To elide heap allocations we need to suppress code blocks guarded by // llvm.coro.alloc and llvm.coro.free instructions. -void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { - LLVMContext &C = FrameTy->getContext(); +void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize, + MaybeAlign FrameAlign, AAResults &AA) { + LLVMContext &C = F->getContext(); auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction()); @@ -130,7 +145,9 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { // here. Possibly we will need to do a mini SROA here and break the coroutine // frame into individual AllocaInst recreating the original alignment. const DataLayout &DL = F->getParent()->getDataLayout(); + auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize); auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt); + Frame->setAlignment(FrameAlign); auto *FrameVoidPtr = new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); @@ -319,8 +336,9 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, replaceWithConstant(DestroyAddrConstant, It.second); if (ShouldElide) { - auto *FrameTy = getFrameType(cast(ResumeAddrConstant)); - elideHeapAllocations(CoroId->getFunction(), FrameTy, AA); + auto FrameSizeAndAlign = getFrameLayout(cast(ResumeAddrConstant)); + elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, + FrameSizeAndAlign.second, AA); coro::replaceCoroFree(CoroId, /*Elide=*/true); } diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index c85f4807758..e166e28cbcd 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/OptimalLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -340,52 +341,182 @@ namespace { // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert // padding. -struct PaddingCalculator { +class FrameTypeBuilder { + struct Field { + uint64_t Size; + uint64_t Offset; + Spill *ForSpill; + Type *Ty; + unsigned FieldIndex; + Align Alignment; + Align TyAlignment; + }; + const DataLayout &DL; LLVMContext &Context; - unsigned StructSize = 0; + uint64_t StructSize = 0; + Align StructAlign; + bool IsFinished = false; - PaddingCalculator(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} + SmallVector Fields; + DenseMap FieldIndexByKey; - // Replicate the logic from IR/DataLayout.cpp to match field offset - // computation for LLVM structs. - void addType(Type *Ty) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - if ((StructSize & (TyAlign - 1)) != 0) - StructSize = alignTo(StructSize, TyAlign); +public: + FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) + : DL(DL), Context(Context) {} - StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item. + class FieldId { + size_t Value; + explicit FieldId(size_t Value) : Value(Value) {} + + friend class FrameTypeBuilder; + }; + + /// Add a field to this structure for the storage of an `alloca` + /// instruction. + FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr, + bool IsHeader = false) { + Type *Ty = AI->getAllocatedType(); + + // Make an array type if this is a static array allocation. + if (AI->isArrayAllocation()) { + if (auto *CI = dyn_cast(AI->getArraySize())) + Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); + else + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } + + return addField(Ty, AI->getAlign(), ForSpill, IsHeader); } - void addTypes(SmallVectorImpl const &Types) { - for (auto *Ty : Types) - addType(Ty); + /// Add a field to this structure. + FieldId addField(Type *Ty, MaybeAlign FieldAlignment, + Spill *ForSpill = nullptr, + bool IsHeader = false) { + assert(!IsFinished && "adding fields to a finished builder"); + assert(Ty && "must provide a type for a field"); + + // The field size is always the alloc size of the type. + uint64_t FieldSize = DL.getTypeAllocSize(Ty); + + // The field alignment might not be the type alignment, but we need + // to remember the type alignment anyway to build the type. + Align TyAlignment = DL.getABITypeAlign(Ty); + if (!FieldAlignment) FieldAlignment = TyAlignment; + + // Lay out header fields immediately. + uint64_t Offset; + if (IsHeader) { + Offset = alignTo(StructSize, FieldAlignment); + StructSize = Offset + FieldSize; + + // Everything else has a flexible offset. + } else { + Offset = OptimalLayoutField::FlexibleOffset; + } + + Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, + *FieldAlignment, TyAlignment}); + return FieldId(Fields.size() - 1); } - unsigned computePadding(Type *Ty, unsigned ForcedAlignment) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - auto Natural = alignTo(StructSize, TyAlign); - auto Forced = alignTo(StructSize, ForcedAlignment); + /// Finish the layout and set the body on the given type. + void finish(StructType *Ty); - // Return how many bytes of padding we need to insert. - if (Natural != Forced) - return std::max(Natural, Forced) - StructSize; - - // Rely on natural alignment. - return 0; + uint64_t getStructSize() const { + assert(IsFinished && "not yet finished!"); + return StructSize; } - // If padding required, return the padding field type to insert. - ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) { - if (auto Padding = computePadding(Ty, ForcedAlignment)) - return ArrayType::get(Type::getInt8Ty(Context), Padding); + Align getStructAlign() const { + assert(IsFinished && "not yet finished!"); + return StructAlign; + } - return nullptr; + unsigned getFieldIndex(FieldId Id) const { + assert(IsFinished && "not yet finished!"); + return Fields[Id.Value].FieldIndex; } }; } // namespace +void FrameTypeBuilder::finish(StructType *Ty) { + assert(!IsFinished && "already finished!"); + + // Prepare the optimal-layout field array. + // The Id in the layout field is a pointer to our Field for it. + SmallVector LayoutFields; + LayoutFields.reserve(Fields.size()); + for (auto &Field : Fields) { + LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, + Field.Offset); + } + + // Perform layout. + auto SizeAndAlign = performOptimalLayout(LayoutFields); + StructSize = SizeAndAlign.first; + StructAlign = SizeAndAlign.second; + + auto getField = [](const OptimalLayoutField &LayoutField) -> Field & { + return *static_cast(const_cast(LayoutField.Id)); + }; + + // We need to produce a packed struct type if there's a field whose + // assigned offset isn't a multiple of its natural type alignment. + bool Packed = [&] { + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + if (!isAligned(F.TyAlignment, LayoutField.Offset)) + return true; + } + return false; + }(); + + // Build the struct body. + SmallVector FieldTypes; + FieldTypes.reserve(LayoutFields.size() * 3 / 2); + uint64_t LastOffset = 0; + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + + auto Offset = LayoutField.Offset; + + // Add a padding field if there's a padding gap and we're either + // building a packed struct or the padding gap is more than we'd + // get from aligning to the field type's natural alignment. + assert(Offset >= LastOffset); + if (Offset != LastOffset) { + if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) + FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), + Offset - LastOffset)); + } + + // Record the layout information into both the Field and the + // original Spill, if there is one. + F.Offset = Offset; + F.FieldIndex = FieldTypes.size(); + if (F.ForSpill) { + F.ForSpill->setFieldIndex(F.FieldIndex); + } + + FieldTypes.push_back(F.Ty); + LastOffset = Offset + F.Size; + } + + Ty->setBody(FieldTypes, Packed); + +#ifndef NDEBUG + // Check that the IR layout matches the offsets we expect. + auto Layout = DL.getStructLayout(Ty); + for (auto &F : Fields) { + assert(Ty->getElementType(F.FieldIndex) == F.Ty); + assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); + } +#endif + + IsFinished = true; +} + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -398,13 +529,17 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SpillInfo &Spills) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); - PaddingCalculator Padder(C, DL); - SmallString<32> Name(F.getName()); - Name.append(".Frame"); - StructType *FrameTy = StructType::create(C, Name); - SmallVector Types; + StructType *FrameTy = [&] { + SmallString<32> Name(F.getName()); + Name.append(".Frame"); + return StructType::create(C, Name); + }(); + + FrameTypeBuilder B(C, DL); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + Optional PromiseFieldId; + Optional SwitchIndexFieldId; if (Shape.ABI == coro::ABI::Switch) { auto *FramePtrTy = FrameTy->getPointerTo(); @@ -412,74 +547,74 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, /*IsVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); - // Figure out how wide should be an integer type storing the suspend index. + // Add header fields for the resume and destroy functions. + // We can rely on these being perfectly packed. + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + + // Add a header field for the promise if there is one. + if (PromiseAlloca) { + PromiseFieldId = + B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true); + } + + // Add a field to store the suspend index. This doesn't need to + // be in the header. unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = PromiseAlloca - ? PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); Type *IndexType = Type::getIntNTy(C, IndexBits); - Types.push_back(FnPtrTy); - Types.push_back(FnPtrTy); - Types.push_back(PromiseType); - Types.push_back(IndexType); + + SwitchIndexFieldId = B.addField(IndexType, None); } else { assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } Value *CurrentDef = nullptr; - Padder.addTypes(Types); - // Create an entry for every spilled value. for (auto &S : Spills) { + // We can have multiple entries in Spills for a single value, but + // they should form a contiguous run. Ignore all but the first. if (CurrentDef == S.def()) continue; CurrentDef = S.def(); - // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == PromiseAlloca) - continue; - uint64_t Count = 1; - Type *Ty = nullptr; + assert(CurrentDef != PromiseAlloca && + "recorded spill use of promise alloca?"); + if (auto *AI = dyn_cast(CurrentDef)) { - Ty = AI->getAllocatedType(); - if (unsigned AllocaAlignment = AI->getAlignment()) { - // If alignment is specified in alloca, see if we need to insert extra - // padding. - if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) { - Types.push_back(PaddingTy); - Padder.addType(PaddingTy); - } - } - if (auto *CI = dyn_cast(AI->getArraySize())) - Count = CI->getValue().getZExtValue(); - else - report_fatal_error("Coroutines cannot handle non static allocas yet"); + B.addFieldForAlloca(AI, &S); } else { - Ty = CurrentDef->getType(); + Type *Ty = CurrentDef->getType(); + B.addField(Ty, None, &S); } - S.setFieldIndex(Types.size()); - if (Count == 1) - Types.push_back(Ty); - else - Types.push_back(ArrayType::get(Ty, Count)); - Padder.addType(Ty); } - FrameTy->setBody(Types); + + B.finish(FrameTy); + Shape.FrameAlign = B.getStructAlign(); + Shape.FrameSize = B.getStructSize(); switch (Shape.ABI) { + // In the switch ABI, remember the field indices for the promise and + // switch-index fields. case coro::ABI::Switch: + Shape.SwitchLowering.IndexField = + B.getFieldIndex(*SwitchIndexFieldId); + Shape.SwitchLowering.PromiseField = + (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); + + // Also round the frame size up to a multiple of its alignment, as is + // generally expected in C/C++. + Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); break; - // Remember whether the frame is inline in the storage. + // In the retcon ABI, remember whether the frame is inline in the storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { - auto &Layout = F.getParent()->getDataLayout(); auto Id = Shape.getRetconCoroId(); Shape.RetconLowering.IsFrameInlineInStorage - = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && - Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + = (B.getStructSize() <= Id->getStorageSize() && + B.getStructAlign() <= Id->getStorageAlignment()); break; } } @@ -608,10 +743,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector, 4> Allocas; - // Promise alloca (if present) has a fixed field number. + + // Promise alloca (if present) doesn't show in the spills and has a + // special field number. if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { assert(Shape.ABI == coro::ABI::Switch); - Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); } // Create a GEP with the given index into the coroutine frame for the original diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h index de2d2920cb1..384e20cd0a9 100644 --- a/lib/Transforms/Coroutines/CoroInstr.h +++ b/lib/Transforms/Coroutines/CoroInstr.h @@ -211,8 +211,8 @@ public: return cast(getArgOperand(SizeArg))->getZExtValue(); } - uint64_t getStorageAlignment() const { - return cast(getArgOperand(AlignArg))->getZExtValue(); + Align getStorageAlignment() const { + return Align(cast(getArgOperand(AlignArg))->getZExtValue()); } Value *getStorage() const { @@ -338,11 +338,16 @@ class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst { enum { FrameArg, AlignArg, FromArg }; public: + /// Are we translating from the frame to the promise (false) or from + /// the promise to the frame (true)? bool isFromPromise() const { return cast(getArgOperand(FromArg))->isOneValue(); } - unsigned getAlignment() const { - return cast(getArgOperand(AlignArg))->getZExtValue(); + + /// The required alignment of the promise. This must match the + /// alignment of the promise alloca in the coroutine. + Align getAlignment() const { + return Align(cast(getArgOperand(AlignArg))->getZExtValue()); } // Methods to support type inquiry through isa, cast, and dyn_cast: diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h index 7eb35400c0d..bd76e93c912 100644 --- a/lib/Transforms/Coroutines/CoroInternal.h +++ b/lib/Transforms/Coroutines/CoroInternal.h @@ -96,17 +96,22 @@ struct LLVM_LIBRARY_VISIBILITY Shape { struct SwitchFieldIndex { enum { Resume, - Destroy, - Promise, - Index, - /// The index of the first spill field. - FirstSpill + Destroy + + // The promise field is always at a fixed offset from the start of + // frame given its type, but the index isn't a constant for all + // possible frames. + + // The switch-index field isn't at a fixed offset or index, either; + // we just work it in where it fits best. }; }; coro::ABI ABI; StructType *FrameTy; + Align FrameAlign; + uint64_t FrameSize; Instruction *FramePtr; BasicBlock *AllocaSpillBlock; @@ -114,6 +119,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape { SwitchInst *ResumeSwitch; AllocaInst *PromiseAlloca; BasicBlock *ResumeEntryBlock; + unsigned IndexField; + unsigned PromiseField; bool HasFinalSuspend; }; @@ -141,10 +148,15 @@ struct LLVM_LIBRARY_VISIBILITY Shape { return cast(CoroBegin->getId()); } + unsigned getSwitchIndexField() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return SwitchLowering.IndexField; + } IntegerType *getIndexType() const { assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); - return cast(FrameTy->getElementType(SwitchFieldIndex::Index)); + return cast(FrameTy->getElementType(getSwitchIndexField())); } ConstantInt *getIndex(uint64_t Value) const { return ConstantInt::get(getIndexType(), Value); @@ -203,23 +215,17 @@ struct LLVM_LIBRARY_VISIBILITY Shape { llvm_unreachable("Unknown coro::ABI enum"); } - unsigned getFirstSpillFieldIndex() const { - switch (ABI) { - case coro::ABI::Switch: - return SwitchFieldIndex::FirstSpill; - - case coro::ABI::Retcon: - case coro::ABI::RetconOnce: - return 0; - } - llvm_unreachable("Unknown coro::ABI enum"); - } - AllocaInst *getPromiseAlloca() const { if (ABI == coro::ABI::Switch) return SwitchLowering.PromiseAlloca; return nullptr; } + unsigned getPromiseField() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + assert(SwitchLowering.PromiseAlloca && "no promise alloca"); + return SwitchLowering.PromiseField; + } /// Allocate memory according to the rules of the active lowering. /// diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 465b6598da6..8a47f89fd4d 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -285,7 +285,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; auto *GepIndex = Builder.CreateStructGEP( - FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); + FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); @@ -311,7 +311,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.CreateStore(NullPtr, GepIndex); } else { auto *GepIndex = Builder.CreateStructGEP( - FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); + FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); @@ -645,6 +645,17 @@ Value *CoroCloner::deriveNewFramePointer() { llvm_unreachable("bad ABI"); } +static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, + unsigned ParamIndex, + uint64_t Size, Align Alignment) { + AttrBuilder ParamAttrs; + ParamAttrs.addAttribute(Attribute::NonNull); + ParamAttrs.addAttribute(Attribute::NoAlias); + ParamAttrs.addAlignmentAttr(Alignment); + ParamAttrs.addDereferenceableAttr(Size); + Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); +} + /// Clone the body of the original function into a resume function of /// some sort. void CoroCloner::create() { @@ -693,6 +704,9 @@ void CoroCloner::create() { // original function. This should include optimization settings and so on. NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, OrigAttrs.getFnAttributes()); + + addFramePointerAttrs(NewAttrs, Context, 0, + Shape.FrameSize, Shape.FrameAlign); break; case coro::ABI::Retcon: @@ -700,13 +714,13 @@ void CoroCloner::create() { // If we have a continuation prototype, just use its attributes, // full-stop. NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + + addFramePointerAttrs(NewAttrs, Context, 0, + Shape.getRetconCoroId()->getStorageSize(), + Shape.getRetconCoroId()->getStorageAlignment()); break; } - // Make the frame parameter nonnull and noalias. - NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); - NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); - switch (Shape.ABI) { // In these ABIs, the cloned functions always return 'void', and the // existing return sites are meaningless. Note that for unique @@ -1002,8 +1016,8 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) { coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); if (AllocInst) { IRBuilder<> Builder(AllocInst); - // FIXME: Need to handle overaligned members. auto *Frame = Builder.CreateAlloca(Shape.FrameTy); + Frame->setAlignment(Shape.FrameAlign); auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); AllocInst->replaceAllUsesWith(Builder.getFalse()); AllocInst->eraseFromParent(); @@ -1237,6 +1251,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape, // Allocate. We don't need to update the call graph node because we're // going to recompute it from scratch after splitting. + // FIXME: pass the required alignment RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); RawFramePtr = Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); diff --git a/test/Transforms/Coroutines/ArgAddr.ll b/test/Transforms/Coroutines/ArgAddr.ll index 81cfb7db06a..a1cac168ac4 100644 --- a/test/Transforms/Coroutines/ArgAddr.ll +++ b/test/Transforms/Coroutines/ArgAddr.ll @@ -1,7 +1,7 @@ ; Need to move users of allocas that were moved into the coroutine frame after ; coro.begin. -; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -enable-coroutines -S | FileCheck %s +; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -O2 -enable-coroutines -S | FileCheck %s +; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -aa-pipeline=basic-aa -passes='default' -enable-coroutines -S | FileCheck %s define nonnull i8* @f(i32 %n) { entry: diff --git a/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll b/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll index 48bf0c52593..f29b5a062ad 100644 --- a/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll +++ b/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll @@ -30,11 +30,11 @@ suspend: } ; See if %this was added to the frame -; CHECK: %f_copy.Frame = type { void (%f_copy.Frame*)*, void (%f_copy.Frame*)*, i1, i1, i64 } +; CHECK: %f_copy.Frame = type { void (%f_copy.Frame*)*, void (%f_copy.Frame*)*, i64, i1 } ; See that %this is spilled into the frame ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg) -; CHECK: %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i64 %this_arg, i64* %this.spill.addr ; CHECK: ret i8* %hdl diff --git a/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll b/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll index bd95f4b9672..b3c4558a47b 100644 --- a/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll +++ b/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll @@ -27,11 +27,11 @@ suspend: } ; See if %this was added to the frame -; CHECK: %f_direct.Frame = type { void (%f_direct.Frame*)*, void (%f_direct.Frame*)*, i1, i1, i64 } +; CHECK: %f_direct.Frame = type { void (%f_direct.Frame*)*, void (%f_direct.Frame*)*, i64, i1 } ; See that %this is spilled into the frame ; CHECK-LABEL: define i8* @f_direct(i64 %this) -; CHECK: %this.spill.addr = getelementptr inbounds %f_direct.Frame, %f_direct.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %this.spill.addr = getelementptr inbounds %f_direct.Frame, %f_direct.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i64 %this, i64* %this.spill.addr ; CHECK: ret i8* %hdl diff --git a/test/Transforms/Coroutines/coro-catchswitch.ll b/test/Transforms/Coroutines/coro-catchswitch.ll index 0eb4b1ee64e..79ce5062a20 100644 --- a/test/Transforms/Coroutines/coro-catchswitch.ll +++ b/test/Transforms/Coroutines/coro-catchswitch.ll @@ -32,7 +32,7 @@ catch.dispatch: ; preds = %if.else, %if.then ; CHECK: catch.dispatch: ; CHECK: %val = phi i32 [ 2, %if.else ], [ 1, %if.then ] ; CHECK: %[[Pad:.+]] = cleanuppad within none [] -; CHECK: %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i32 %val, i32* %val.spill.addr ; CHECK: cleanupret from %[[Pad]] unwind label %[[Switch:.+]] diff --git a/test/Transforms/Coroutines/coro-debug.ll b/test/Transforms/Coroutines/coro-debug.ll index e4a71e81088..b624c88f99f 100644 --- a/test/Transforms/Coroutines/coro-debug.ll +++ b/test/Transforms/Coroutines/coro-debug.ll @@ -128,12 +128,12 @@ attributes #7 = { noduplicate } !24 = !DILocation(line: 62, column: 3, scope: !6) ; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]] -; CHECK: define internal fastcc void @f.resume(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[RESUME:[0-9]+]] +; CHECK: define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[RESUME:[0-9]+]] ; CHECK: entry.resume: ; CHECK-NEXT: call void @coro.devirt.trigger(i8* null) ; CHECK-NEXT: call void @llvm.dbg.declare(metadata i32* %x.addr.reload.addr, metadata ![[RESUME_VAR:[0-9]+]] -; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]] -; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]] +; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]] +; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]] ; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink" diff --git a/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll b/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll index d8565aacb20..4a2da113fd0 100644 --- a/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll +++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll @@ -32,13 +32,13 @@ invoke2: ; CHECK: pad.with.phi.from.invoke2: ; CHECK: %0 = cleanuppad within none [] -; CHECK: %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 3 ; CHECK: %y.reload = load i32, i32* %y.reload.addr ; CHECK: cleanupret from %0 unwind label %pad.with.phi ; CHECK: pad.with.phi.from.invoke1: ; CHECK: %1 = cleanuppad within none [] -; CHECK: %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 2 ; CHECK: %x.reload = load i32, i32* %x.reload.addr ; CHECK: cleanupret from %1 unwind label %pad.with.phi diff --git a/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll b/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll index 8e9fada5c46..b5948e6419f 100644 --- a/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll +++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll @@ -32,13 +32,13 @@ invoke2: ; CHECK: pad.with.phi.from.invoke2: ; CHECK: %0 = cleanuppad within none [] -; CHECK: %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 3 ; CHECK: %y.reload = load i32, i32* %y.reload.addr ; CHECK: cleanupret from %0 unwind label %pad.with.phi ; CHECK: pad.with.phi.from.invoke1: ; CHECK: %1 = cleanuppad within none [] -; CHECK: %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 2 ; CHECK: %x.reload = load i32, i32* %x.reload.addr ; CHECK: cleanupret from %1 unwind label %pad.with.phi diff --git a/test/Transforms/Coroutines/coro-frame-arrayalloca.ll b/test/Transforms/Coroutines/coro-frame-arrayalloca.ll index 56c1113f240..8de2ac8e078 100644 --- a/test/Transforms/Coroutines/coro-frame-arrayalloca.ll +++ b/test/Transforms/Coroutines/coro-frame-arrayalloca.ll @@ -36,13 +36,13 @@ suspend: } ; See if the array alloca was stored as an array field. -; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, double, [4 x i32], double } +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, double, double, [4 x i32], i1 } ; See if we used correct index to access prefix, data, suffix (@f) ; CHECK-LABEL: @f( -; CHECK: %prefix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK-NEXT: %data = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 -; CHECK-NEXT: %suffix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %prefix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK-NEXT: %data = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %suffix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 ; CHECK-NEXT: call void @consume.double.ptr(double* %prefix) ; CHECK-NEXT: call void @consume.i32.ptr(i32* %data) ; CHECK-NEXT: call void @consume.double.ptr(double* %suffix) @@ -50,9 +50,9 @@ suspend: ; See if we used correct index to access prefix, data, suffix (@f.resume) ; CHECK-LABEL: @f.resume( -; CHECK: %[[SUFFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 -; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 -; CHECK: %[[PREFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %[[SUFFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %[[PREFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: call void @consume.double.ptr(double* %[[PREFIX]]) ; CHECK-NEXT: call void @consume.i32.ptr(i32* %[[DATA]]) ; CHECK-NEXT: call void @consume.double.ptr(double* %[[SUFFIX]]) diff --git a/test/Transforms/Coroutines/coro-frame.ll b/test/Transforms/Coroutines/coro-frame.ll index f19e9024fc4..3823d661922 100644 --- a/test/Transforms/Coroutines/coro-frame.ll +++ b/test/Transforms/Coroutines/coro-frame.ll @@ -35,17 +35,17 @@ pad: } ; See if the float was added to the frame -; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double } +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, double, i64, i1 } ; See if the float was spilled into the frame ; CHECK-LABEL: @f( ; CHECK: %r = call double @print( -; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store double %r, double* %r.spill.addr ; CHECK: ret i8* %hdl -; See of the float was loaded from the frame -; CHECK-LABEL: @f.resume( +; See if the float was loaded from the frame +; CHECK-LABEL: @f.resume(%f.Frame* noalias nonnull align 8 ; CHECK: %r.reload = load double, double* %r.reload.addr ; CHECK: call double @print(double %r.reload) ; CHECK: ret void diff --git a/test/Transforms/Coroutines/coro-heap-elide.ll b/test/Transforms/Coroutines/coro-heap-elide.ll index fdf5d109554..a5315e02c7c 100644 --- a/test/Transforms/Coroutines/coro-heap-elide.ll +++ b/test/Transforms/Coroutines/coro-heap-elide.ll @@ -57,7 +57,7 @@ if.end: ; CHECK-LABEL: @callResume( define void @callResume() { entry: -; CHECK: alloca %f.frame +; CHECK: alloca [4 x i8], align 4 ; CHECK-NOT: coro.begin ; CHECK-NOT: CustomAlloc ; CHECK: call void @may_throw() @@ -87,7 +87,7 @@ entry: ; CHECK-LABEL: @callResume_with_coro_suspend_1( define void @callResume_with_coro_suspend_1() { entry: -; CHECK: alloca %f.frame +; CHECK: alloca [4 x i8], align 4 ; CHECK-NOT: coro.begin ; CHECK-NOT: CustomAlloc ; CHECK: call void @may_throw() @@ -134,7 +134,7 @@ coro.ret: ; CHECK-LABEL: @callResume_with_coro_suspend_2( define void @callResume_with_coro_suspend_2() personality i8* null { entry: -; CHECK: alloca %f.frame +; CHECK: alloca [4 x i8], align 4 ; CHECK-NOT: coro.begin ; CHECK-NOT: CustomAlloc ; CHECK: call void @may_throw() @@ -199,7 +199,7 @@ coro.ret: ; CHECK-LABEL: @callResume_with_coro_suspend_3( define void @callResume_with_coro_suspend_3(i8 %cond) { entry: -; CHECK: alloca %f.frame +; CHECK: alloca [4 x i8], align 4 switch i8 %cond, label %coro.ret [ i8 0, label %init.suspend i8 1, label %coro.ret @@ -287,7 +287,7 @@ return: define void @callResume_PR34897_elision(i1 %cond) { ; CHECK-LABEL: entry: entry: -; CHECK: alloca %f.frame +; CHECK: alloca [4 x i8], align 4 ; CHECK: tail call void @bar( tail call void @bar(i8* null) br i1 %cond, label %if.then, label %if.else diff --git a/test/Transforms/Coroutines/coro-materialize.ll b/test/Transforms/Coroutines/coro-materialize.ll index 88076470f45..2a5a7182dca 100644 --- a/test/Transforms/Coroutines/coro-materialize.ll +++ b/test/Transforms/Coroutines/coro-materialize.ll @@ -34,7 +34,7 @@ suspend: } ; See that we only spilled one value -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i1 } ; CHECK-LABEL: @f( declare i8* @llvm.coro.free(token, i8*) diff --git a/test/Transforms/Coroutines/coro-padding.ll b/test/Transforms/Coroutines/coro-padding.ll index f79f9c98ca7..2dbc6cbce65 100644 --- a/test/Transforms/Coroutines/coro-padding.ll +++ b/test/Transforms/Coroutines/coro-padding.ll @@ -9,7 +9,7 @@ declare void @consume(%PackedStruct*) define i8* @f() "coroutine.presplit"="1" { entry: - %data = alloca %PackedStruct, align 8 + %data = alloca %PackedStruct, align 32 %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @malloc(i32 %size) @@ -32,17 +32,17 @@ suspend: } ; See if the padding was inserted before PackedStruct -; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, [6 x i8], %PackedStruct } +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], %PackedStruct } -; See if we used correct index to access packed struct (padding is field 4) +; See if we used correct index to access packed struct (padding is field 3) ; CHECK-LABEL: @f( -; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 ; CHECK-NEXT: call void @consume(%PackedStruct* %[[DATA]]) ; CHECK: ret i8* -; See if we used correct index to access packed struct (padding is field 4) +; See if we used correct index to access packed struct (padding is field 3) ; CHECK-LABEL: @f.resume( -; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 ; CHECK-NEXT: call void @consume(%PackedStruct* %[[DATA]]) ; CHECK: ret void diff --git a/test/Transforms/Coroutines/coro-param-copy.ll b/test/Transforms/Coroutines/coro-param-copy.ll index 739c4cc7418..5967a05226f 100644 --- a/test/Transforms/Coroutines/coro-param-copy.ll +++ b/test/Transforms/Coroutines/coro-param-copy.ll @@ -33,7 +33,7 @@ suspend: } ; See that we added both x and y to the frame. -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, i64 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } ; See that all of the uses prior to coro-begin stays put. ; CHECK-LABEL: define i8* @f() { @@ -46,10 +46,10 @@ suspend: ; See that we only copy the x as y was not modified prior to coro.begin. ; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK-NEXT: %1 = load i64, i64* %x.addr ; CHECK-NEXT: store i64 %1, i64* %0 -; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 ; CHECK-NEXT: store i1 false, i1* %index.addr1 ; CHECK-NEXT: ret i8* %hdl diff --git a/test/Transforms/Coroutines/coro-retcon-alloca.ll b/test/Transforms/Coroutines/coro-retcon-alloca.ll index 17aec4eed1b..61c21324d94 100644 --- a/test/Transforms/Coroutines/coro-retcon-alloca.ll +++ b/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -28,21 +28,20 @@ cleanup: ; CHECK-LABEL: define { i8*, i8*, i32 } @f(i8* %buffer, i32 %n) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32* -; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4 -; CHECK-NEXT: [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n) ; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %buffer, i64 8 -; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8** +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i32* +; CHECK-NEXT: store i32 %n, i32* [[T1]], align 4 +; CHECK-NEXT: [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n) +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* %buffer to i8** ; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T1]], align 8 ; CHECK-NEXT: [[T0:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[ALLOC]], 1 ; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i8*, i32 } [[T0]], i32 %n, 2 ; CHECK-NEXT: ret { i8*, i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) ; CHECK-NEXT: : -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %0, i64 8 -; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8** +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* %0 to i8** ; CHECK-NEXT: [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8 ; CHECK-NEXT: tail call void @deallocate(i8* [[ALLOC]]) ; CHECK-NEXT: br i1 %1, @@ -83,14 +82,14 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to i32* -; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 8 ; CHECK-NEXT: %inc = add i32 [[T1]], 1 -; CHECK-NEXT: store i32 %inc, i32* [[T0]], align 4 +; CHECK-NEXT: store i32 %inc, i32* [[T0]], align 8 ; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64 ; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 ; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) @@ -132,17 +131,17 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : ; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32* -; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 8 ; CHECK-NEXT: %inc = add i32 [[T1]], 1 ; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64 ; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 ; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) -; CHECK-NEXT: store i32 %inc, i32* [[NSLOT]], align 4 +; CHECK-NEXT: store i32 %inc, i32* [[NSLOT]], align 8 ; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %inc, 1 ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK: : @@ -180,14 +179,14 @@ loop2: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull %0) +; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0) ; CHECK-NEXT: : ; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32* -; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 8 ; CHECK-NEXT: %inc = add i32 [[T1]], 1 ; CHECK-NEXT: br label %loop2 ; CHECK: : -; CHECK-NEXT: store i32 %k, i32* [[NSLOT]], align 4 +; CHECK-NEXT: store i32 %k, i32* [[NSLOT]], align 8 ; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %k, 1 ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK: loop2: diff --git a/test/Transforms/Coroutines/coro-retcon-once-value.ll b/test/Transforms/Coroutines/coro-retcon-once-value.ll index ac49b22ee6b..6e80da87bc0 100644 --- a/test/Transforms/Coroutines/coro-retcon-once-value.ll +++ b/test/Transforms/Coroutines/coro-retcon-once-value.ll @@ -45,7 +45,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[T1]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1) +; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -57,7 +57,7 @@ cleanup: ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.1(i8* noalias nonnull %0, i1 zeroext %1) +; CHECK-LABEL: define internal void @f.resume.1(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : diff --git a/test/Transforms/Coroutines/coro-retcon-once-value2.ll b/test/Transforms/Coroutines/coro-retcon-once-value2.ll index ad49f24dc54..4f43da03550 100644 --- a/test/Transforms/Coroutines/coro-retcon-once-value2.ll +++ b/test/Transforms/Coroutines/coro-retcon-once-value2.ll @@ -37,7 +37,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32* } [[T0]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1) +; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** ; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] diff --git a/test/Transforms/Coroutines/coro-retcon-resume-values.ll b/test/Transforms/Coroutines/coro-retcon-resume-values.ll index ac99dd15b98..80e8170d7ba 100644 --- a/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ b/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -30,7 +30,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32, i1)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i32 %1, i1 zeroext %2) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1, i1 zeroext %2) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to i32* ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4 diff --git a/test/Transforms/Coroutines/coro-retcon-resume-values2.ll b/test/Transforms/Coroutines/coro-retcon-resume-values2.ll index 43f98e958aa..e2412b6b8a5 100644 --- a/test/Transforms/Coroutines/coro-retcon-resume-values2.ll +++ b/test/Transforms/Coroutines/coro-retcon-resume-values2.ll @@ -29,7 +29,7 @@ entry: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i32 %1) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** ; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] @@ -45,7 +45,7 @@ entry: ; CHECK-NEXT: ret i8* [[CONT]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull %0, i32 %1) +; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** ; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] @@ -64,7 +64,7 @@ entry: ; CHECK-NEXT: ret i8* [[CONT]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull %0, i32 %1) +; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** ; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] diff --git a/test/Transforms/Coroutines/coro-retcon-value.ll b/test/Transforms/Coroutines/coro-retcon-value.ll index cfda73bbe75..29ec7cda170 100644 --- a/test/Transforms/Coroutines/coro-retcon-value.ll +++ b/test/Transforms/Coroutines/coro-retcon-value.ll @@ -30,7 +30,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i8 zeroext %1) +; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i8 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = icmp eq i8 %1, 0 ; CHECK-NEXT: br i1 [[T0]], diff --git a/test/Transforms/Coroutines/coro-retcon.ll b/test/Transforms/Coroutines/coro-retcon.ll index 5cd4cb61d94..13283f05b26 100644 --- a/test/Transforms/Coroutines/coro-retcon.ll +++ b/test/Transforms/Coroutines/coro-retcon.ll @@ -30,7 +30,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : diff --git a/test/Transforms/Coroutines/coro-spill-after-phi.ll b/test/Transforms/Coroutines/coro-spill-after-phi.ll index a22d8243433..1be173e5fc1 100644 --- a/test/Transforms/Coroutines/coro-spill-after-phi.ll +++ b/test/Transforms/Coroutines/coro-spill-after-phi.ll @@ -34,14 +34,14 @@ suspend: } ; Verifies that the both phis are stored correctly in the coroutine frame -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32, i32 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 } ; CHECK-LABEL: @f( ; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr ; CHECK: %phi1 = select i1 %n, i32 0, i32 2 ; CHECK: %phi2 = select i1 %n, i32 1, i32 3 -; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 ; CHECK: store i32 %phi2, i32* %phi2.spill.addr -; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i32 %phi1, i32* %phi1.spill.addr ; CHECK: ret i8* %hdl diff --git a/test/Transforms/Coroutines/coro-spill-corobegin.ll b/test/Transforms/Coroutines/coro-spill-corobegin.ll index 0caea1c90fb..8923e2880f2 100644 --- a/test/Transforms/Coroutines/coro-spill-corobegin.ll +++ b/test/Transforms/Coroutines/coro-spill-corobegin.ll @@ -38,18 +38,18 @@ suspend: } ; See if the i8* for coro.begin was added to f.Frame -; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i8* } +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i8*, i1 } ; See if the g's coro.begin was spilled into the frame ; CHECK-LABEL: @f( ; CHECK: %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*)) ; CHECK: %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null) -; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i8* %innerhdl, i8** %[[spilladdr]] ; See if the coro.begin was loaded from the frame ; CHECK-LABEL: @f.resume( -; CHECK: %[[innerhdlAddr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %{{.+}}, i32 0, i32 4 +; CHECK: %[[innerhdlAddr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %{{.+}}, i32 0, i32 2 ; CHECK: %[[innerhdl:.+]] = load i8*, i8** %[[innerhdlAddr]] ; CHECK: %[[gframe:.+]] = bitcast i8* %[[innerhdl]] to %g.Frame* ; CHECK: %[[gvarAddr:.+]] = getelementptr inbounds %g.Frame, %g.Frame* %[[gframe]], i32 0, i32 4 diff --git a/test/Transforms/Coroutines/coro-swifterror.ll b/test/Transforms/Coroutines/coro-swifterror.ll index 932e448a571..7390bb77ca9 100644 --- a/test/Transforms/Coroutines/coro-swifterror.ll +++ b/test/Transforms/Coroutines/coro-swifterror.ll @@ -40,7 +40,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1, i8**)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1, i8** swifterror %2) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1, i8** swifterror %2) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -102,7 +102,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1)* @g.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @g.resume.0(i8* noalias nonnull %0, i1 zeroext %1) +; CHECK-LABEL: define internal i8* @g.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[ERRORSLOT:%.*]] = alloca swifterror i8*, align 4 ; CHECK-NEXT: br i1 %1, diff --git a/test/Transforms/Coroutines/ex0.ll b/test/Transforms/Coroutines/ex0.ll index 514ae867ae0..de5752387cf 100644 --- a/test/Transforms/Coroutines/ex0.ll +++ b/test/Transforms/Coroutines/ex0.ll @@ -1,6 +1,6 @@ ; First example from Doc/Coroutines.rst (two block loop) -; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s -; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default' -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -O2 -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define i8* @f(i32 %n) { entry: diff --git a/test/Transforms/Coroutines/ex1.ll b/test/Transforms/Coroutines/ex1.ll index 161b59fb918..42f603865b6 100644 --- a/test/Transforms/Coroutines/ex1.ll +++ b/test/Transforms/Coroutines/ex1.ll @@ -1,6 +1,6 @@ ; First example from Doc/Coroutines.rst (one block loop) -; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -enable-coroutines -S | FileCheck %s +; RUN: opt < %s -O2 -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define i8* @f(i32 %n) { entry: diff --git a/test/Transforms/Coroutines/ex5.ll b/test/Transforms/Coroutines/ex5.ll index 84728f56b04..dd566505ba8 100644 --- a/test/Transforms/Coroutines/ex5.ll +++ b/test/Transforms/Coroutines/ex5.ll @@ -1,6 +1,6 @@ ; Fifth example from Doc/Coroutines.rst (final suspend) -; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -enable-coroutines -S | FileCheck %s +; RUN: opt < %s -O2 -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define i8* @f(i32 %n) { entry: