Use optimal layout and preserve alloca alignment in coroutine frames.

Previously, we would ignore alloca alignment when building the frame and just use the natural alignment of the allocated type. If an alloca is over-aligned for its IR type, this could lead to a frame entry with inadequate alignment for the downstream uses of the alloca. Since highly-aligned fields also tend to produce poor layouts under a naive layout algorithm, I've also switched coroutine frames to use the new optimal struct layout algorithm. In order to communicate the frame size and alignment to later passes, I needed to set align+dereferenceable attributes on the frame-pointer parameter of the resume function. This is clearly the right thing to do, but the align attribute currently seems to result in assumptions being added during inlining that the optimizer cannot easily remove.
2024-10-19 19:12:56 +02:00 · 2020-03-25 16:58:09 -04:00 · 2020-03-25 16:58:09 -04:00 · 51d7b85305
commit 51d7b85305
parent d6a743b7f8
33 changed files with 378 additions and 196 deletions
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@ -3084,7 +3084,9 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
    if (Attrs.hasParamAttribute(I, AK))
      Copy.addAttribute(AK);
  }
-  if (Attrs.hasParamAttribute(I, Attribute::Alignment))
+  // `align` is ABI-affecting only in combination with `byval`.
+  if (Attrs.hasParamAttribute(I, Attribute::Alignment) &&
+      Attrs.hasParamAttribute(I, Attribute::ByVal))
    Copy.addAlignmentAttr(Attrs.getParamAlignment(I));
  return Copy;
 }
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@ -61,14 +61,14 @@ void Lowerer::lowerResumeOrDestroy(CallSite CS,
 // TODO: Handle the case when coroutine promise alloca has align override.
 void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) {
  Value *Operand = Intrin->getArgOperand(0);
-  unsigned Alignement = Intrin->getAlignment();
+  Align Alignment = Intrin->getAlignment();
  Type *Int8Ty = Builder.getInt8Ty();

  auto *SampleStruct =
      StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty});
  const DataLayout &DL = TheModule.getDataLayout();
  int64_t Offset = alignTo(
-      DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignement);
+      DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignment);
  if (Intrin->isFromPromise())
    Offset = -Offset;

--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@ -34,7 +34,8 @@ struct Lowerer : coro::LowererBase {

  Lowerer(Module &M) : LowererBase(M) {}

-  void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
+  void elideHeapAllocations(Function *F, uint64_t FrameSize,
+                            MaybeAlign FrameAlign, AAResults &AA);
  bool shouldElide(Function *F, DominatorTree &DT) const;
  void collectPostSplitCoroIds(Function *F);
  bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
@ -92,10 +93,23 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
      }
 }

-// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
-static Type *getFrameType(Function *Resume) {
-  auto *ArgType = Resume->arg_begin()->getType();
-  return cast<PointerType>(ArgType)->getElementType();
+// Given a resume function @f.resume(%f.frame* %frame), returns the size
+// and expected alignment of %f.frame type.
+static std::pair<uint64_t, MaybeAlign> getFrameLayout(Function *Resume) {
+  // Prefer to pull information from the function attributes.
+  auto Size = Resume->getParamDereferenceableBytes(0);
+  auto Align = Resume->getParamAlign(0);
+
+  // If those aren't given, extract them from the type.
+  if (Size == 0 || !Align) {
+    auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType();
+
+    const DataLayout &DL = Resume->getParent()->getDataLayout();
+    if (!Size) Size = DL.getTypeAllocSize(FrameTy);
+    if (!Align) Align = DL.getABITypeAlign(FrameTy);
+  }
+
+  return std::make_pair(Size, Align);
 }

 // Finds first non alloca instruction in the entry block of a function.
@ -108,8 +122,9 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {

 // To elide heap allocations we need to suppress code blocks guarded by
 // llvm.coro.alloc and llvm.coro.free instructions.
-void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
-  LLVMContext &C = FrameTy->getContext();
+void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
+                                   MaybeAlign FrameAlign, AAResults &AA) {
+  LLVMContext &C = F->getContext();
  auto *InsertPt =
      getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction());

@ -130,7 +145,9 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
  // here. Possibly we will need to do a mini SROA here and break the coroutine
  // frame into individual AllocaInst recreating the original alignment.
  const DataLayout &DL = F->getParent()->getDataLayout();
+  auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize);
  auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
+  Frame->setAlignment(FrameAlign);
  auto *FrameVoidPtr =
      new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);

@ -319,8 +336,9 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
    replaceWithConstant(DestroyAddrConstant, It.second);

  if (ShouldElide) {
-    auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
-    elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
+    auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant));
+    elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first,
+                         FrameSizeAndAlign.second, AA);
    coro::replaceCoroFree(CoroId, /*Elide=*/true);
  }

--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@ -29,6 +29,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/OptimalLayout.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@ -340,52 +341,182 @@ namespace {
 // coroutine frame and if the alignment specified on the Alloca instruction
 // differs from the natural alignment of the alloca type we will need to insert
 // padding.
-struct PaddingCalculator {
+class FrameTypeBuilder {
+  struct Field {
+    uint64_t Size;
+    uint64_t Offset;
+    Spill *ForSpill;
+    Type *Ty;
+    unsigned FieldIndex;
+    Align Alignment;
+    Align TyAlignment;
+  };
+
  const DataLayout &DL;
  LLVMContext &Context;
-  unsigned StructSize = 0;
+  uint64_t StructSize = 0;
+  Align StructAlign;
+  bool IsFinished = false;

-  PaddingCalculator(LLVMContext &Context, DataLayout const &DL)
-      : DL(DL), Context(Context) {}
+  SmallVector<Field, 8> Fields;
+  DenseMap<Value*, unsigned> FieldIndexByKey;

-  // Replicate the logic from IR/DataLayout.cpp to match field offset
-  // computation for LLVM structs.
-  void addType(Type *Ty) {
-    unsigned TyAlign = DL.getABITypeAlignment(Ty);
-    if ((StructSize & (TyAlign - 1)) != 0)
-      StructSize = alignTo(StructSize, TyAlign);
+public:
+  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
+    : DL(DL), Context(Context) {}

-    StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item.
+  class FieldId {
+    size_t Value;
+    explicit FieldId(size_t Value) : Value(Value) {}
+
+    friend class FrameTypeBuilder;
+  };
+
+  /// Add a field to this structure for the storage of an `alloca`
+  /// instruction.
+  FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr,
+                            bool IsHeader = false) {
+    Type *Ty = AI->getAllocatedType();
+
+    // Make an array type if this is a static array allocation.
+    if (AI->isArrayAllocation()) {
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+        Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
+      else
+        report_fatal_error("Coroutines cannot handle non static allocas yet");
+    }
+
+    return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
  }

-  void addTypes(SmallVectorImpl<Type *> const &Types) {
-    for (auto *Ty : Types)
-      addType(Ty);
+  /// Add a field to this structure.
+  FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
+                   Spill *ForSpill = nullptr,
+                   bool IsHeader = false) {
+    assert(!IsFinished && "adding fields to a finished builder");
+    assert(Ty && "must provide a type for a field");
+
+    // The field size is always the alloc size of the type.
+    uint64_t FieldSize = DL.getTypeAllocSize(Ty);
+
+    // The field alignment might not be the type alignment, but we need
+    // to remember the type alignment anyway to build the type.
+    Align TyAlignment = DL.getABITypeAlign(Ty);
+    if (!FieldAlignment) FieldAlignment = TyAlignment;
+
+    // Lay out header fields immediately.
+    uint64_t Offset;
+    if (IsHeader) {
+      Offset = alignTo(StructSize, FieldAlignment);
+      StructSize = Offset + FieldSize;
+
+    // Everything else has a flexible offset.
+    } else {
+      Offset = OptimalLayoutField::FlexibleOffset;
+    }
+
+    Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
+                      *FieldAlignment, TyAlignment});
+    return FieldId(Fields.size() - 1);
  }

-  unsigned computePadding(Type *Ty, unsigned ForcedAlignment) {
-    unsigned TyAlign = DL.getABITypeAlignment(Ty);
-    auto Natural = alignTo(StructSize, TyAlign);
-    auto Forced = alignTo(StructSize, ForcedAlignment);
+  /// Finish the layout and set the body on the given type.
+  void finish(StructType *Ty);

-    // Return how many bytes of padding we need to insert.
-    if (Natural != Forced)
-      return std::max(Natural, Forced) - StructSize;
-
-    // Rely on natural alignment.
-    return 0;
+  uint64_t getStructSize() const {
+    assert(IsFinished && "not yet finished!");
+    return StructSize;
  }

-  // If padding required, return the padding field type to insert.
-  ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) {
-    if (auto Padding = computePadding(Ty, ForcedAlignment))
-      return ArrayType::get(Type::getInt8Ty(Context), Padding);
+  Align getStructAlign() const {
+    assert(IsFinished && "not yet finished!");
+    return StructAlign;
+  }

-    return nullptr;
+  unsigned getFieldIndex(FieldId Id) const {
+    assert(IsFinished && "not yet finished!");
+    return Fields[Id.Value].FieldIndex;
  }
 };
 } // namespace

+void FrameTypeBuilder::finish(StructType *Ty) {
+  assert(!IsFinished && "already finished!");
+
+  // Prepare the optimal-layout field array.
+  // The Id in the layout field is a pointer to our Field for it.
+  SmallVector<OptimalLayoutField, 8> LayoutFields;
+  LayoutFields.reserve(Fields.size());
+  for (auto &Field : Fields) {
+    LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
+                              Field.Offset);
+  }
+
+  // Perform layout.
+  auto SizeAndAlign = performOptimalLayout(LayoutFields);
+  StructSize = SizeAndAlign.first;
+  StructAlign = SizeAndAlign.second;
+
+  auto getField = [](const OptimalLayoutField &LayoutField) -> Field & {
+    return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
+  };
+
+  // We need to produce a packed struct type if there's a field whose
+  // assigned offset isn't a multiple of its natural type alignment.
+  bool Packed = [&] {
+    for (auto &LayoutField : LayoutFields) {
+      auto &F = getField(LayoutField);
+      if (!isAligned(F.TyAlignment, LayoutField.Offset))
+        return true;
+    }
+    return false;
+  }();
+
+  // Build the struct body.
+  SmallVector<Type*, 16> FieldTypes;
+  FieldTypes.reserve(LayoutFields.size() * 3 / 2);
+  uint64_t LastOffset = 0;
+  for (auto &LayoutField : LayoutFields) {
+    auto &F = getField(LayoutField);
+
+    auto Offset = LayoutField.Offset;
+
+    // Add a padding field if there's a padding gap and we're either
+    // building a packed struct or the padding gap is more than we'd
+    // get from aligning to the field type's natural alignment.
+    assert(Offset >= LastOffset);
+    if (Offset != LastOffset) {
+      if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
+        FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
+                                            Offset - LastOffset));
+    }
+
+    // Record the layout information into both the Field and the
+    // original Spill, if there is one.
+    F.Offset = Offset;
+    F.FieldIndex = FieldTypes.size();
+    if (F.ForSpill) {
+      F.ForSpill->setFieldIndex(F.FieldIndex);
+    }
+
+    FieldTypes.push_back(F.Ty);
+    LastOffset = Offset + F.Size;
+  }
+
+  Ty->setBody(FieldTypes, Packed);
+
+#ifndef NDEBUG
+  // Check that the IR layout matches the offsets we expect.
+  auto Layout = DL.getStructLayout(Ty);
+  for (auto &F : Fields) {
+    assert(Ty->getElementType(F.FieldIndex) == F.Ty);
+    assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
+  }
+#endif
+
+  IsFinished = true;
+}
+
 // Build a struct that will keep state for an active coroutine.
 //   struct f.frame {
 //     ResumeFnTy ResumeFnAddr;
@ -398,13 +529,17 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
                                  SpillInfo &Spills) {
  LLVMContext &C = F.getContext();
  const DataLayout &DL = F.getParent()->getDataLayout();
-  PaddingCalculator Padder(C, DL);
-  SmallString<32> Name(F.getName());
-  Name.append(".Frame");
-  StructType *FrameTy = StructType::create(C, Name);
-  SmallVector<Type *, 8> Types;
+  StructType *FrameTy = [&] {
+    SmallString<32> Name(F.getName());
+    Name.append(".Frame");
+    return StructType::create(C, Name);
+  }();
+
+  FrameTypeBuilder B(C, DL);

  AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
+  Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
+  Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;

  if (Shape.ABI == coro::ABI::Switch) {
    auto *FramePtrTy = FrameTy->getPointerTo();
@ -412,74 +547,74 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
                                   /*IsVarArg=*/false);
    auto *FnPtrTy = FnTy->getPointerTo();

-    // Figure out how wide should be an integer type storing the suspend index.
+    // Add header fields for the resume and destroy functions.
+    // We can rely on these being perfectly packed.
+    B.addField(FnPtrTy, None, nullptr, /*header*/ true);
+    B.addField(FnPtrTy, None, nullptr, /*header*/ true);
+
+    // Add a header field for the promise if there is one.
+    if (PromiseAlloca) {
+      PromiseFieldId =
+        B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true);
+    }
+
+    // Add a field to store the suspend index.  This doesn't need to
+    // be in the header.
    unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
-    Type *PromiseType = PromiseAlloca
-                            ? PromiseAlloca->getType()->getElementType()
-                            : Type::getInt1Ty(C);
    Type *IndexType = Type::getIntNTy(C, IndexBits);
-    Types.push_back(FnPtrTy);
-    Types.push_back(FnPtrTy);
-    Types.push_back(PromiseType);
-    Types.push_back(IndexType);
+
+    SwitchIndexFieldId = B.addField(IndexType, None);
  } else {
    assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
  }

  Value *CurrentDef = nullptr;

-  Padder.addTypes(Types);
-
  // Create an entry for every spilled value.
  for (auto &S : Spills) {
+    // We can have multiple entries in Spills for a single value, but
+    // they should form a contiguous run.  Ignore all but the first.
    if (CurrentDef == S.def())
      continue;

    CurrentDef = S.def();
-    // PromiseAlloca was already added to Types array earlier.
-    if (CurrentDef == PromiseAlloca)
-      continue;

-    uint64_t Count = 1;
-    Type *Ty = nullptr;
+    assert(CurrentDef != PromiseAlloca &&
+           "recorded spill use of promise alloca?");
+
    if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
-      Ty = AI->getAllocatedType();
-      if (unsigned AllocaAlignment = AI->getAlignment()) {
-        // If alignment is specified in alloca, see if we need to insert extra
-        // padding.
-        if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) {
-          Types.push_back(PaddingTy);
-          Padder.addType(PaddingTy);
-        }
-      }
-      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
-        Count = CI->getValue().getZExtValue();
-      else
-        report_fatal_error("Coroutines cannot handle non static allocas yet");
+      B.addFieldForAlloca(AI, &S);
    } else {
-      Ty = CurrentDef->getType();
+      Type *Ty = CurrentDef->getType();
+      B.addField(Ty, None, &S);
    }
-    S.setFieldIndex(Types.size());
-    if (Count == 1)
-      Types.push_back(Ty);
-    else
-      Types.push_back(ArrayType::get(Ty, Count));
-    Padder.addType(Ty);
  }
-  FrameTy->setBody(Types);
+
+  B.finish(FrameTy);
+  Shape.FrameAlign = B.getStructAlign();
+  Shape.FrameSize = B.getStructSize();

  switch (Shape.ABI) {
+  // In the switch ABI, remember the field indices for the promise and
+  // switch-index fields.
  case coro::ABI::Switch:
+    Shape.SwitchLowering.IndexField =
+      B.getFieldIndex(*SwitchIndexFieldId);
+    Shape.SwitchLowering.PromiseField =
+      (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
+
+    // Also round the frame size up to a multiple of its alignment, as is
+    // generally expected in C/C++.
+    Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
    break;

-  // Remember whether the frame is inline in the storage.
+  // In the retcon ABI, remember whether the frame is inline in the storage.
  case coro::ABI::Retcon:
  case coro::ABI::RetconOnce: {
-    auto &Layout = F.getParent()->getDataLayout();
    auto Id = Shape.getRetconCoroId();
    Shape.RetconLowering.IsFrameInlineInStorage
-      = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() &&
-         Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment());
+      = (B.getStructSize() <= Id->getStorageSize() &&
+         B.getStructAlign() <= Id->getStorageAlignment());
    break;
  }
  }
@ -608,10 +743,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
  // we remember allocas and their indices to be handled once we processed
  // all the spills.
  SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
-  // Promise alloca (if present) has a fixed field number.
+
+  // Promise alloca (if present) doesn't show in the spills and has a
+  // special field number.
  if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
    assert(Shape.ABI == coro::ABI::Switch);
-    Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise);
+    Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
  }

  // Create a GEP with the given index into the coroutine frame for the original
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@ -211,8 +211,8 @@ public:
    return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue();
  }

-  uint64_t getStorageAlignment() const {
-    return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+  Align getStorageAlignment() const {
+    return Align(cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue());
  }

  Value *getStorage() const {
@ -338,11 +338,16 @@ class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst {
  enum { FrameArg, AlignArg, FromArg };

 public:
+  /// Are we translating from the frame to the promise (false) or from
+  /// the promise to the frame (true)?
  bool isFromPromise() const {
    return cast<Constant>(getArgOperand(FromArg))->isOneValue();
  }
-  unsigned getAlignment() const {
-    return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+
+  /// The required alignment of the promise.  This must match the
+  /// alignment of the promise alloca in the coroutine.
+  Align getAlignment() const {
+    return Align(cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue());
  }

  // Methods to support type inquiry through isa, cast, and dyn_cast:
--- a/lib/Transforms/Coroutines/CoroInternal.h
+++ b/lib/Transforms/Coroutines/CoroInternal.h
@ -96,17 +96,22 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
  struct SwitchFieldIndex {
    enum {
      Resume,
-      Destroy,
-      Promise,
-      Index,
-      /// The index of the first spill field.
-      FirstSpill
+      Destroy
+
+      // The promise field is always at a fixed offset from the start of
+      // frame given its type, but the index isn't a constant for all
+      // possible frames.
+
+      // The switch-index field isn't at a fixed offset or index, either;
+      // we just work it in where it fits best.
    };
  };

  coro::ABI ABI;

  StructType *FrameTy;
+  Align FrameAlign;
+  uint64_t FrameSize;
  Instruction *FramePtr;
  BasicBlock *AllocaSpillBlock;

@ -114,6 +119,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
    SwitchInst *ResumeSwitch;
    AllocaInst *PromiseAlloca;
    BasicBlock *ResumeEntryBlock;
+    unsigned IndexField;
+    unsigned PromiseField;
    bool HasFinalSuspend;
  };

@ -141,10 +148,15 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
    return cast<AnyCoroIdRetconInst>(CoroBegin->getId());
  }

+  unsigned getSwitchIndexField() const {
+    assert(ABI == coro::ABI::Switch);
+    assert(FrameTy && "frame type not assigned");
+    return SwitchLowering.IndexField;
+  }
  IntegerType *getIndexType() const {
    assert(ABI == coro::ABI::Switch);
    assert(FrameTy && "frame type not assigned");
-    return cast<IntegerType>(FrameTy->getElementType(SwitchFieldIndex::Index));
+    return cast<IntegerType>(FrameTy->getElementType(getSwitchIndexField()));
  }
  ConstantInt *getIndex(uint64_t Value) const {
    return ConstantInt::get(getIndexType(), Value);
@ -203,23 +215,17 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
    llvm_unreachable("Unknown coro::ABI enum");
  }

-  unsigned getFirstSpillFieldIndex() const {
-    switch (ABI) {
-    case coro::ABI::Switch:
-      return SwitchFieldIndex::FirstSpill;
-
-    case coro::ABI::Retcon:
-    case coro::ABI::RetconOnce:
-      return 0;
-    }
-    llvm_unreachable("Unknown coro::ABI enum");
-  }
-
  AllocaInst *getPromiseAlloca() const {
    if (ABI == coro::ABI::Switch)
      return SwitchLowering.PromiseAlloca;
    return nullptr;
  }
+  unsigned getPromiseField() const {
+    assert(ABI == coro::ABI::Switch);
+    assert(FrameTy && "frame type not assigned");
+    assert(SwitchLowering.PromiseAlloca && "no promise alloca");
+    return SwitchLowering.PromiseField;
+  }

  /// Allocate memory according to the rules of the active lowering.
  ///
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@ -285,7 +285,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
  auto *FramePtr = Shape.FramePtr;
  auto *FrameTy = Shape.FrameTy;
  auto *GepIndex = Builder.CreateStructGEP(
-      FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+      FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
  auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
  auto *Switch =
      Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
@ -311,7 +311,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
      Builder.CreateStore(NullPtr, GepIndex);
    } else {
      auto *GepIndex = Builder.CreateStructGEP(
-          FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+          FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
      Builder.CreateStore(IndexVal, GepIndex);
    }
    Save->replaceAllUsesWith(ConstantTokenNone::get(C));
@ -645,6 +645,17 @@ Value *CoroCloner::deriveNewFramePointer() {
  llvm_unreachable("bad ABI");
 }

+static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
+                                 unsigned ParamIndex,
+                                 uint64_t Size, Align Alignment) {
+  AttrBuilder ParamAttrs;
+  ParamAttrs.addAttribute(Attribute::NonNull);
+  ParamAttrs.addAttribute(Attribute::NoAlias);
+  ParamAttrs.addAlignmentAttr(Alignment);
+  ParamAttrs.addDereferenceableAttr(Size);
+  Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
+}
+
 /// Clone the body of the original function into a resume function of
 /// some sort.
 void CoroCloner::create() {
@ -693,6 +704,9 @@ void CoroCloner::create() {
    // original function.  This should include optimization settings and so on.
    NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
                                      OrigAttrs.getFnAttributes());
+
+    addFramePointerAttrs(NewAttrs, Context, 0,
+                         Shape.FrameSize, Shape.FrameAlign);
    break;

  case coro::ABI::Retcon:
@ -700,13 +714,13 @@ void CoroCloner::create() {
    // If we have a continuation prototype, just use its attributes,
    // full-stop.
    NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
+
+    addFramePointerAttrs(NewAttrs, Context, 0,
+                         Shape.getRetconCoroId()->getStorageSize(),
+                         Shape.getRetconCoroId()->getStorageAlignment());
    break;
  }

-  // Make the frame parameter nonnull and noalias.
-  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
-  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
-
  switch (Shape.ABI) {
  // In these ABIs, the cloned functions always return 'void', and the
  // existing return sites are meaningless.  Note that for unique
@ -1002,8 +1016,8 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
    coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
    if (AllocInst) {
      IRBuilder<> Builder(AllocInst);
-      // FIXME: Need to handle overaligned members.
      auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
+      Frame->setAlignment(Shape.FrameAlign);
      auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
      AllocInst->replaceAllUsesWith(Builder.getFalse());
      AllocInst->eraseFromParent();
@ -1237,6 +1251,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,

    // Allocate.  We don't need to update the call graph node because we're
    // going to recompute it from scratch after splitting.
+    // FIXME: pass the required alignment
    RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
    RawFramePtr =
      Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
--- a/test/Transforms/Coroutines/ArgAddr.ll
+++ b/test/Transforms/Coroutines/ArgAddr.ll
@ -1,7 +1,7 @@
 ; Need to move users of allocas that were moved into the coroutine frame after
 ; coro.begin.
-; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
+; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -O2 -enable-coroutines -S | FileCheck %s
+; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false  -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s

 define nonnull i8* @f(i32 %n) {
 entry:
--- a/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
+++ b/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
@ -30,11 +30,11 @@ suspend:
 }

 ; See if %this was added to the frame
-; CHECK: %f_copy.Frame = type { void (%f_copy.Frame*)*, void (%f_copy.Frame*)*, i1, i1, i64 }
+; CHECK: %f_copy.Frame = type { void (%f_copy.Frame*)*, void (%f_copy.Frame*)*, i64, i1 }

 ; See that %this is spilled into the frame
 ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg)
-; CHECK:  %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 4
+; CHECK:  %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:  store i64 %this_arg, i64* %this.spill.addr
 ; CHECK: ret i8* %hdl

--- a/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
+++ b/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
@ -27,11 +27,11 @@ suspend:
 }

 ; See if %this was added to the frame
-; CHECK: %f_direct.Frame = type { void (%f_direct.Frame*)*, void (%f_direct.Frame*)*, i1, i1, i64 }
+; CHECK: %f_direct.Frame = type { void (%f_direct.Frame*)*, void (%f_direct.Frame*)*, i64, i1 }

 ; See that %this is spilled into the frame
 ; CHECK-LABEL: define i8* @f_direct(i64 %this)
-; CHECK: %this.spill.addr = getelementptr inbounds %f_direct.Frame, %f_direct.Frame* %FramePtr, i32 0, i32 4
+; CHECK: %this.spill.addr = getelementptr inbounds %f_direct.Frame, %f_direct.Frame* %FramePtr, i32 0, i32 2
 ; CHECK: store i64 %this, i64* %this.spill.addr
 ; CHECK: ret i8* %hdl

--- a/test/Transforms/Coroutines/coro-catchswitch.ll
+++ b/test/Transforms/Coroutines/coro-catchswitch.ll
@ -32,7 +32,7 @@ catch.dispatch:                                   ; preds = %if.else, %if.then
 ; CHECK: catch.dispatch:
 ; CHECK:  %val = phi i32 [ 2, %if.else ], [ 1, %if.then ]
 ; CHECK:  %[[Pad:.+]] = cleanuppad within none []
-; CHECK:  %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK:  %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:  store i32 %val, i32* %val.spill.addr
 ; CHECK:  cleanupret from %[[Pad]] unwind label %[[Switch:.+]]

--- a/test/Transforms/Coroutines/coro-debug.ll
+++ b/test/Transforms/Coroutines/coro-debug.ll
@ -128,12 +128,12 @@ attributes #7 = { noduplicate }
 !24 = !DILocation(line: 62, column: 3, scope: !6)

 ; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]]
-; CHECK: define internal fastcc void @f.resume(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[RESUME:[0-9]+]]
+; CHECK: define internal fastcc void @f.resume(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[RESUME:[0-9]+]]
 ; CHECK: entry.resume:
 ; CHECK-NEXT: call void @coro.devirt.trigger(i8* null)
 ; CHECK-NEXT: call void @llvm.dbg.declare(metadata i32* %x.addr.reload.addr, metadata ![[RESUME_VAR:[0-9]+]]
-; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]]
-; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]]
+; CHECK: define internal fastcc void @f.destroy(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]]
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* noalias nonnull align 8 dereferenceable(32) %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]]

 ; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink"

--- a/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
+++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
@ -32,13 +32,13 @@ invoke2:

 ; CHECK: pad.with.phi.from.invoke2:
 ; CHECK:   %0 = cleanuppad within none []
-; CHECK:   %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 3
 ; CHECK:   %y.reload = load i32, i32* %y.reload.addr
 ; CHECK:   cleanupret from %0 unwind label %pad.with.phi

 ; CHECK: pad.with.phi.from.invoke1:
 ; CHECK:   %1 = cleanuppad within none []
-; CHECK:   %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:   %x.reload = load i32, i32* %x.reload.addr
 ; CHECK:   cleanupret from %1 unwind label %pad.with.phi

--- a/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
+++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
@ -32,13 +32,13 @@ invoke2:

 ; CHECK: pad.with.phi.from.invoke2:
 ; CHECK:   %0 = cleanuppad within none []
-; CHECK:   %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 3
 ; CHECK:   %y.reload = load i32, i32* %y.reload.addr
 ; CHECK:   cleanupret from %0 unwind label %pad.with.phi

 ; CHECK: pad.with.phi.from.invoke1:
 ; CHECK:   %1 = cleanuppad within none []
-; CHECK:   %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:   %x.reload = load i32, i32* %x.reload.addr
 ; CHECK:   cleanupret from %1 unwind label %pad.with.phi

--- a/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
+++ b/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
@ -36,13 +36,13 @@ suspend:
 }

 ; See if the array alloca was stored as an array field.
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, double, [4 x i32], double }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, double, double, [4 x i32], i1 }

 ; See if we used correct index to access prefix, data, suffix (@f)
 ; CHECK-LABEL: @f(
-; CHECK:       %prefix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
-; CHECK-NEXT:  %data = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
-; CHECK-NEXT:  %suffix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6
+; CHECK:       %prefix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
+; CHECK-NEXT:  %data = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK-NEXT:  %suffix = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
 ; CHECK-NEXT:  call void @consume.double.ptr(double* %prefix)
 ; CHECK-NEXT:  call void @consume.i32.ptr(i32* %data)
 ; CHECK-NEXT:  call void @consume.double.ptr(double* %suffix)
@ -50,9 +50,9 @@ suspend:

 ; See if we used correct index to access prefix, data, suffix (@f.resume)
 ; CHECK-LABEL: @f.resume(
-; CHECK:       %[[SUFFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6
-; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
-; CHECK:       %[[PREFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK:       %[[SUFFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
+; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK:       %[[PREFIX:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:       call void @consume.double.ptr(double* %[[PREFIX]])
 ; CHECK-NEXT:  call void @consume.i32.ptr(i32* %[[DATA]])
 ; CHECK-NEXT:  call void @consume.double.ptr(double* %[[SUFFIX]])
--- a/test/Transforms/Coroutines/coro-frame.ll
+++ b/test/Transforms/Coroutines/coro-frame.ll
@ -35,17 +35,17 @@ pad:
 }

 ; See if the float was added to the frame
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, double, i64, i1 }

 ; See if the float was spilled into the frame
 ; CHECK-LABEL: @f(
 ; CHECK: %r = call double @print(
-; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK: store double %r, double* %r.spill.addr
 ; CHECK: ret i8* %hdl

-; See of the float was loaded from the frame
-; CHECK-LABEL: @f.resume(
+; See if the float was loaded from the frame
+; CHECK-LABEL: @f.resume(%f.Frame* noalias nonnull align 8
 ; CHECK: %r.reload = load double, double* %r.reload.addr
 ; CHECK: call double @print(double %r.reload)
 ; CHECK: ret void
--- a/test/Transforms/Coroutines/coro-heap-elide.ll
+++ b/test/Transforms/Coroutines/coro-heap-elide.ll
@ -57,7 +57,7 @@ if.end:
 ; CHECK-LABEL: @callResume(
 define void @callResume() {
 entry:
-; CHECK: alloca %f.frame
+; CHECK: alloca [4 x i8], align 4
 ; CHECK-NOT: coro.begin
 ; CHECK-NOT: CustomAlloc
 ; CHECK: call void @may_throw()
@ -87,7 +87,7 @@ entry:
 ; CHECK-LABEL: @callResume_with_coro_suspend_1(
 define void @callResume_with_coro_suspend_1() {
 entry:
-; CHECK: alloca %f.frame
+; CHECK: alloca [4 x i8], align 4
 ; CHECK-NOT: coro.begin
 ; CHECK-NOT: CustomAlloc
 ; CHECK: call void @may_throw()
@ -134,7 +134,7 @@ coro.ret:
 ; CHECK-LABEL: @callResume_with_coro_suspend_2(
 define void @callResume_with_coro_suspend_2() personality i8* null {
 entry:
-; CHECK: alloca %f.frame
+; CHECK: alloca [4 x i8], align 4
 ; CHECK-NOT: coro.begin
 ; CHECK-NOT: CustomAlloc
 ; CHECK: call void @may_throw()
@ -199,7 +199,7 @@ coro.ret:
 ; CHECK-LABEL: @callResume_with_coro_suspend_3(
 define void @callResume_with_coro_suspend_3(i8 %cond) {
 entry:
-; CHECK: alloca %f.frame
+; CHECK: alloca [4 x i8], align 4
  switch i8 %cond, label  %coro.ret [
    i8 0, label %init.suspend
    i8 1, label %coro.ret
@ -287,7 +287,7 @@ return:
 define void @callResume_PR34897_elision(i1 %cond) {
 ; CHECK-LABEL: entry:
 entry:
-; CHECK: alloca %f.frame
+; CHECK: alloca [4 x i8], align 4
 ; CHECK: tail call void @bar(
  tail call void @bar(i8* null)
  br i1 %cond, label %if.then, label %if.else
--- a/test/Transforms/Coroutines/coro-materialize.ll
+++ b/test/Transforms/Coroutines/coro-materialize.ll
@ -34,7 +34,7 @@ suspend:
 }

 ; See that we only spilled one value
-; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 }
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i1 }
 ; CHECK-LABEL: @f(

 declare i8* @llvm.coro.free(token, i8*)
--- a/test/Transforms/Coroutines/coro-padding.ll
+++ b/test/Transforms/Coroutines/coro-padding.ll
@ -9,7 +9,7 @@ declare void @consume(%PackedStruct*)

 define i8* @f() "coroutine.presplit"="1" {
 entry:
-  %data = alloca %PackedStruct, align 8
+  %data = alloca %PackedStruct, align 32
  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
  %size = call i32 @llvm.coro.size.i32()
  %alloc = call i8* @malloc(i32 %size)
@ -32,17 +32,17 @@ suspend:
 }

 ; See if the padding was inserted before PackedStruct
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, [6 x i8], %PackedStruct }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], %PackedStruct }

-; See if we used correct index to access packed struct (padding is field 4)
+; See if we used correct index to access packed struct (padding is field 3)
 ; CHECK-LABEL: @f(
-; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
 ; CHECK-NEXT:  call void @consume(%PackedStruct* %[[DATA]])
 ; CHECK: ret i8*

-; See if we used correct index to access packed struct (padding is field 4)
+; See if we used correct index to access packed struct (padding is field 3)
 ; CHECK-LABEL: @f.resume(
-; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
 ; CHECK-NEXT:  call void @consume(%PackedStruct* %[[DATA]])
 ; CHECK: ret void

--- a/test/Transforms/Coroutines/coro-param-copy.ll
+++ b/test/Transforms/Coroutines/coro-param-copy.ll
@ -33,7 +33,7 @@ suspend:
 }

 ; See that we added both x and y to the frame.
-; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, i64 }
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 }

 ; See that all of the uses prior to coro-begin stays put.
 ; CHECK-LABEL: define i8* @f() {
@ -46,10 +46,10 @@ suspend:

 ; See that we only copy the x as y was not modified prior to coro.begin.
 ; CHECK:  store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr
-; CHECK-NEXT:  %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK-NEXT:  %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK-NEXT:  %1 = load i64, i64* %x.addr
 ; CHECK-NEXT:  store i64 %1, i64* %0
-; CHECK-NEXT:  %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
+; CHECK-NEXT:  %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
 ; CHECK-NEXT:  store i1 false, i1* %index.addr1
 ; CHECK-NEXT:  ret i8* %hdl

--- a/test/Transforms/Coroutines/coro-retcon-alloca.ll
+++ b/test/Transforms/Coroutines/coro-retcon-alloca.ll
@ -28,21 +28,20 @@ cleanup:

 ; CHECK-LABEL: define { i8*, i8*, i32 } @f(i8* %buffer, i32 %n)
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %buffer to i32*
-; CHECK-NEXT:    store i32 %n, i32* [[T0]], align 4
-; CHECK-NEXT:    [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n)
 ; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds i8, i8* %buffer, i64 8
-; CHECK-NEXT:    [[T1:%.*]] = bitcast i8* [[T0]] to i8**
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i8* [[T0]] to i32*
+; CHECK-NEXT:    store i32 %n, i32* [[T1]], align 4
+; CHECK-NEXT:    [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i8* %buffer to i8**
 ; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[T1]], align 8
 ; CHECK-NEXT:    [[T0:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[ALLOC]], 1
 ; CHECK-NEXT:    [[RET:%.*]] = insertvalue { i8*, i8*, i32 } [[T0]], i32 %n, 2
 ; CHECK-NEXT:    ret { i8*, i8*, i32 } [[RET]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i1 %1)
+; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1)
 ; CHECK-NEXT:  :
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds i8, i8* %0, i64 8
-; CHECK-NEXT:    [[T1:%.*]] = bitcast i8* [[T0]] to i8**
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i8* %0 to i8**
 ; CHECK-NEXT:    [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8
 ; CHECK-NEXT:    tail call void @deallocate(i8* [[ALLOC]])
 ; CHECK-NEXT:    br i1 %1,
@ -83,14 +82,14 @@ cleanup:
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull %0, i1 %1)
+; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to i32*
-; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[T0]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[T0]], align 8
 ; CHECK-NEXT:    %inc = add i32 [[T1]], 1
-; CHECK-NEXT:    store i32 %inc, i32* [[T0]], align 4
+; CHECK-NEXT:    store i32 %inc, i32* [[T0]], align 8
 ; CHECK-NEXT:    [[T0:%.*]] = zext i32 %inc to i64
 ; CHECK-NEXT:    [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
 ; CHECK-NEXT:    call void @use(i8* nonnull [[ALLOC]])
@ -132,17 +131,17 @@ cleanup:
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull %0, i1 %1)
+; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
 ; CHECK-NEXT:    [[NSLOT:%.*]] = bitcast i8* %0 to i32*
-; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[NSLOT]], align 8
 ; CHECK-NEXT:    %inc = add i32 [[T1]], 1
 ; CHECK-NEXT:    [[T0:%.*]] = zext i32 %inc to i64
 ; CHECK-NEXT:    [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
 ; CHECK-NEXT:    call void @use(i8* nonnull [[ALLOC]])
-; CHECK-NEXT:    store i32 %inc, i32* [[NSLOT]], align 4
+; CHECK-NEXT:    store i32 %inc, i32* [[NSLOT]], align 8
 ; CHECK-NEXT:    [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %inc, 1
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK:       :
@ -180,14 +179,14 @@ loop2:
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull %0)
+; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[NSLOT:%.*]] = bitcast i8* %0 to i32*
-; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[NSLOT]], align 8
 ; CHECK-NEXT:    %inc = add i32 [[T1]], 1
 ; CHECK-NEXT:    br label %loop2
 ; CHECK:       :
-; CHECK-NEXT:    store i32 %k, i32* [[NSLOT]], align 4
+; CHECK-NEXT:    store i32 %k, i32* [[NSLOT]], align 8
 ; CHECK-NEXT:    [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %k, 1
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK:       loop2:
--- a/test/Transforms/Coroutines/coro-retcon-once-value.ll
+++ b/test/Transforms/Coroutines/coro-retcon-once-value.ll
@ -45,7 +45,7 @@ cleanup:
 ; CHECK-NEXT:    ret { i8*, i32 } [[T1]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1)
+; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
@ -57,7 +57,7 @@ cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal void @f.resume.1(i8* noalias nonnull %0, i1 zeroext %1)
+; CHECK-LABEL: define internal void @f.resume.1(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
--- a/test/Transforms/Coroutines/coro-retcon-once-value2.ll
+++ b/test/Transforms/Coroutines/coro-retcon-once-value2.ll
@ -37,7 +37,7 @@ cleanup:
 ; CHECK-NEXT:    ret { i8*, i32* } [[T0]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1)
+; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
 ; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
--- a/test/Transforms/Coroutines/coro-retcon-resume-values.ll
+++ b/test/Transforms/Coroutines/coro-retcon-resume-values.ll
@ -30,7 +30,7 @@ cleanup:
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32, i1)* @f.resume.0 to i8*)
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i32 %1, i1 zeroext %2)
+; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1, i1 zeroext %2)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to i32*
 ; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[T0]], align 4
--- a/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
+++ b/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
@ -29,7 +29,7 @@ entry:
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*)
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i32 %1)
+; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
 ; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
@ -45,7 +45,7 @@ entry:
 ; CHECK-NEXT:    ret i8* [[CONT]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull %0, i32 %1)
+; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
 ; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
@ -64,7 +64,7 @@ entry:
 ; CHECK-NEXT:    ret i8* [[CONT]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull %0, i32 %1)
+; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
 ; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
--- a/test/Transforms/Coroutines/coro-retcon-value.ll
+++ b/test/Transforms/Coroutines/coro-retcon-value.ll
@ -30,7 +30,7 @@ cleanup:
 ; CHECK-NEXT:    ret { i8*, i32 } [[RET]]
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i8 zeroext %1)
+; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i8 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[T0:%.*]] = icmp eq i8 %1, 0
 ; CHECK-NEXT:    br i1 [[T0]],
--- a/test/Transforms/Coroutines/coro-retcon.ll
+++ b/test/Transforms/Coroutines/coro-retcon.ll
@ -30,7 +30,7 @@ cleanup:
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*)
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1)
+; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
--- a/test/Transforms/Coroutines/coro-spill-after-phi.ll
+++ b/test/Transforms/Coroutines/coro-spill-after-phi.ll
@ -34,14 +34,14 @@ suspend:
 }

 ; Verifies that the both phis are stored correctly in the coroutine frame
-; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32, i32 }
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 }
 ; CHECK-LABEL: @f(
 ; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr
 ; CHECK: %phi1 = select i1 %n, i32 0, i32 2
 ; CHECK: %phi2 = select i1 %n, i32 1, i32 3
-; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
 ; CHECK: store i32 %phi2, i32* %phi2.spill.addr
-; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK: store i32 %phi1, i32* %phi1.spill.addr
 ; CHECK: ret i8* %hdl

--- a/test/Transforms/Coroutines/coro-spill-corobegin.ll
+++ b/test/Transforms/Coroutines/coro-spill-corobegin.ll
@ -38,18 +38,18 @@ suspend:
 }

 ; See if the i8* for coro.begin was added to f.Frame
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i8* }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i8*, i1 }

 ; See if the g's coro.begin was spilled into the frame
 ; CHECK-LABEL: @f(
 ; CHECK: %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*))
 ; CHECK: %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null)
-; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK: store i8* %innerhdl, i8** %[[spilladdr]]

 ; See if the coro.begin was loaded from the frame
 ; CHECK-LABEL: @f.resume(
-; CHECK: %[[innerhdlAddr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %{{.+}}, i32 0, i32 4
+; CHECK: %[[innerhdlAddr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %{{.+}}, i32 0, i32 2
 ; CHECK: %[[innerhdl:.+]] = load i8*, i8** %[[innerhdlAddr]]
 ; CHECK: %[[gframe:.+]] = bitcast i8* %[[innerhdl]] to %g.Frame*
 ; CHECK: %[[gvarAddr:.+]] = getelementptr inbounds %g.Frame, %g.Frame* %[[gframe]], i32 0, i32 4
--- a/test/Transforms/Coroutines/coro-swifterror.ll
+++ b/test/Transforms/Coroutines/coro-swifterror.ll
@ -40,7 +40,7 @@ cleanup:
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i1, i8**)* @f.resume.0 to i8*)
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull %0, i1 zeroext %1, i8** swifterror %2)
+; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1, i8** swifterror %2)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    br i1 %1,
 ; CHECK:       :
@ -102,7 +102,7 @@ cleanup:
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i1)* @g.resume.0 to i8*)
 ; CHECK-NEXT:  }

-; CHECK-LABEL: define internal i8* @g.resume.0(i8* noalias nonnull %0, i1 zeroext %1)
+; CHECK-LABEL: define internal i8* @g.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1)
 ; CHECK-NEXT:  :
 ; CHECK-NEXT:    [[ERRORSLOT:%.*]] = alloca swifterror i8*, align 4
 ; CHECK-NEXT:    br i1 %1,
--- a/test/Transforms/Coroutines/ex0.ll
+++ b/test/Transforms/Coroutines/ex0.ll
@ -1,6 +1,6 @@
 ; First example from Doc/Coroutines.rst (two block loop)
-; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s
-; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default<O2>' -S | FileCheck %s
+; RUN: opt < %s -enable-coroutines -O2 -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
+; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default<O2>' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s

 define i8* @f(i32 %n) {
 entry:
--- a/test/Transforms/Coroutines/ex1.ll
+++ b/test/Transforms/Coroutines/ex1.ll
@ -1,6 +1,6 @@
 ; First example from Doc/Coroutines.rst (one block loop)
-; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
+; RUN: opt < %s -O2 -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s

 define i8* @f(i32 %n) {
 entry:
--- a/test/Transforms/Coroutines/ex5.ll
+++ b/test/Transforms/Coroutines/ex5.ll
@ -1,6 +1,6 @@
 ; Fifth example from Doc/Coroutines.rst (final suspend)
-; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
+; RUN: opt < %s -O2 -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s

 define i8* @f(i32 %n) {
 entry: