diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 98142d196c5..3ad1d92e8c2 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -985,6 +985,10 @@ def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>; def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; +def int_coro_alloca_alloc : Intrinsic<[llvm_token_ty], + [llvm_anyint_ty, llvm_i32_ty], []>; +def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>; +def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>; def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem, ReadNone<0>, ReadNone<1>]>; diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index 9e2a2bfda4a..0d0ca511ca2 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -960,6 +960,155 @@ static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } +static bool isSuspendBlock(BasicBlock *BB) { + return isa(BB->front()); +} + +typedef SmallPtrSet VisitedBlocksSet; + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto User : AI->users()) { + if (auto FI = dyn_cast(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// After we split the coroutine, will the given basic block be along +/// an obvious exit path for the resumption function? +static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, + unsigned depth = 3) { + // If we've bottomed out our depth count, stop searching and assume + // that the path might loop back. + if (depth == 0) return false; + + // If this is a suspend block, we're about to exit the resumption function. + if (isSuspendBlock(BB)) return true; + + // Recurse into the successors. + for (auto Succ : successors(BB)) { + if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) + return false; + } + + // If none of the successors leads back in a loop, we're on an exit/abort. + return true; +} + +static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { + // Look for a free that isn't sufficiently obviously followed by + // either a suspend or a termination, i.e. something that will leave + // the coro resumption frame. + for (auto U : AI->users()) { + auto FI = dyn_cast(U); + if (!FI) continue; + + if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) + return true; + } + + // If we never found one, we don't need a stack save. + return false; +} + +/// Turn each of the given local allocas into a normal (dynamic) alloca +/// instruction. +static void lowerLocalAllocas(ArrayRef LocalAllocas) { + for (auto AI : LocalAllocas) { + auto M = AI->getModule(); + IRBuilder<> Builder(AI); + + // Save the stack depth. Try to avoid doing this if the stackrestore + // is going to immediately precede a return or something. + Value *StackSave = nullptr; + if (localAllocaNeedsStackSave(AI)) + StackSave = Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + // Allocate memory. + auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); + Alloca->setAlignment(AI->getAlignment()); + + for (auto U : AI->users()) { + // Replace gets with the allocation. + if (isa(U)) { + U->replaceAllUsesWith(Alloca); + + // Replace frees with stackrestores. This is safe because + // alloca.alloc is required to obey a stack discipline, although we + // don't enforce that structurally. + } else { + auto FI = cast(U); + if (StackSave) { + Builder.SetInsertPoint(FI); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + } + } + cast(U)->eraseFromParent(); + } + + AI->eraseFromParent(); + } +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, + coro::Shape &Shape, + SmallVectorImpl &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast(Alloc); +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite // access to local variables. @@ -992,6 +1141,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; + SmallVector LocalAllocas; + SmallVector DeadInstructions; for (int Repeat = 0; Repeat < 4; ++Repeat) { // See if there are materializable instructions across suspend points. @@ -1021,12 +1172,35 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // of the Coroutine Frame. if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) continue; + // The Coroutine Promise always included into coroutine frame, no need to // check for suspend crossing. if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca == &I) continue; + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills.emplace_back(Alloc, U); + } + continue; + } + for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. @@ -1040,4 +1214,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); + lowerLocalAllocas(LocalAllocas); + + for (auto I : DeadInstructions) + I->eraseFromParent(); } diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h index 0a12e0f5cd0..de2d2920cb1 100644 --- a/lib/Transforms/Coroutines/CoroInstr.h +++ b/lib/Transforms/Coroutines/CoroInstr.h @@ -456,6 +456,60 @@ public: } }; +/// This represents the llvm.coro.alloca.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst { + enum { SizeArg, AlignArg }; +public: + Value *getSize() const { + return getArgOperand(SizeArg); + } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.get instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_get; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_free; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + } // End namespace llvm. #endif diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index af27e54eed1..b81a08557b4 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -424,10 +424,15 @@ void coro::Shape::buildFrom(Function &F) { // Check that the result type of the suspend matches the resume types. Type *SResultTy = Suspend->getType(); - ArrayRef SuspendResultTys = - (isa(SResultTy) - ? cast(SResultTy)->elements() - : SResultTy); // forms an ArrayRef using SResultTy, be careful + ArrayRef SuspendResultTys; + if (SResultTy->isVoidTy()) { + // leave as empty array + } else if (auto SResultStructTy = dyn_cast(SResultTy)) { + SuspendResultTys = SResultStructTy->elements(); + } else { + // forms an ArrayRef using SResultTy, be careful + SuspendResultTys = SResultTy; + } if (SuspendResultTys.size() != ResumeTys.size()) { #ifndef NDEBUG Suspend->dump(); diff --git a/test/Transforms/Coroutines/coro-retcon-alloca.ll b/test/Transforms/Coroutines/coro-retcon-alloca.ll new file mode 100644 index 00000000000..14d5474b8b2 --- /dev/null +++ b/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -0,0 +1,219 @@ +; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s + +target datalayout = "p:64:64:64" + +declare {i8*, i8*, i32} @prototype_f(i8*, i1) +define {i8*, i8*, i32} @f(i8* %buffer, i32 %n) { +entry: + %id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i8*, i32} (i8*, i1)* @prototype_f to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + br label %loop + +loop: + %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] + %alloca = call token @llvm.coro.alloca.alloc.i32(i32 %n.val, i32 8) + %ptr = call i8* @llvm.coro.alloca.get(token %alloca) + %unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i8* %ptr, i32 %n.val) + call void @llvm.coro.alloca.free(token %alloca) + br i1 %unwind, label %cleanup, label %resume + +resume: + %inc = add i32 %n.val, 1 + br label %loop + +cleanup: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +; CHECK-LABEL: define { i8*, i8*, i32 } @f(i8* %buffer, i32 %n) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32* +; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4 +; CHECK-NEXT: [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n) +; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %buffer, i64 8 +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8** +; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T1]], align 8 +; CHECK-NEXT: [[T0:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[ALLOC]], 1 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i8*, i32 } [[T0]], i32 %n, 2 +; CHECK-NEXT: ret { i8*, i8*, i32 } [[RET]] +; CHECK-NEXT: } + +; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-NEXT: : +; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %0, i64 8 +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8** +; CHECK-NEXT: [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8 +; CHECK-NEXT: tail call void @deallocate(i8* [[ALLOC]]) +; CHECK-NEXT: br i1 %1, + +declare {i8*, i32} @prototype_g(i8*, i1) +define {i8*, i32} @g(i8* %buffer, i32 %n) { +entry: + %id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*, i1)* @prototype_g to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + br label %loop + +loop: + %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] + %alloca = call token @llvm.coro.alloca.alloc.i32(i32 %n.val, i32 8) + %ptr = call i8* @llvm.coro.alloca.get(token %alloca) + call void @use(i8* %ptr) + call void @llvm.coro.alloca.free(token %alloca) + %unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %n.val) + br i1 %unwind, label %cleanup, label %resume + +resume: + %inc = add i32 %n.val, 1 + br label %loop + +cleanup: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +; CHECK-LABEL: define { i8*, i32 } @g(i8* %buffer, i32 %n) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32* +; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4 +; CHECK-NEXT: [[T0:%.*]] = zext i32 %n to i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 +; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 %n, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK-NEXT: } + +; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-NEXT: : +; CHECK-NEXT: br i1 %1, +; CHECK: : +; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to i32* +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4 +; CHECK-NEXT: %inc = add i32 [[T1]], 1 +; CHECK-NEXT: store i32 %inc, i32* [[T0]], align 4 +; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 +; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 %inc, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK: : +; CHECK-NEXT: ret { i8*, i32 } { i8* null, i32 undef } + +declare {i8*, i32} @prototype_h(i8*, i1) +define {i8*, i32} @h(i8* %buffer, i32 %n) { +entry: + %id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*, i1)* @prototype_h to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + br label %loop + +loop: + %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] + %unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %n.val) + br i1 %unwind, label %cleanup, label %resume + +resume: + %inc = add i32 %n.val, 1 + %alloca = call token @llvm.coro.alloca.alloc.i32(i32 %inc, i32 8) + %ptr = call i8* @llvm.coro.alloca.get(token %alloca) + call void @use(i8* %ptr) + call void @llvm.coro.alloca.free(token %alloca) + br label %loop + +cleanup: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +; CHECK-LABEL: define { i8*, i32 } @h(i8* %buffer, i32 %n) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32* +; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %n, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK-NEXT: } + +; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull %0, i1 %1) +; CHECK-NEXT: : +; CHECK-NEXT: br i1 %1, +; CHECK: : +; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32* +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4 +; CHECK-NEXT: %inc = add i32 [[T1]], 1 +; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 +; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) +; CHECK-NEXT: store i32 %inc, i32* [[NSLOT]], align 4 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %inc, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK: : +; CHECK-NEXT: ret { i8*, i32 } { i8* null, i32 undef } + +declare {i8*, i32} @prototype_i(i8*) +define {i8*, i32} @i(i8* %buffer, i32 %n) { +entry: + %id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*)* @prototype_i to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + br label %loop + +loop: + %n.val = phi i32 [ %n, %entry ], [ %k, %loop2 ] + call void (...) @llvm.coro.suspend.retcon.isVoid(i32 %n.val) + %inc = add i32 %n.val, 1 + br label %loop2 + +loop2: + %k = phi i32 [ %inc, %loop ], [ %k2, %loop2 ] + %alloca = call token @llvm.coro.alloca.alloc.i32(i32 %k, i32 8) + %ptr = call i8* @llvm.coro.alloca.get(token %alloca) + call void @use(i8* %ptr) + call void @llvm.coro.alloca.free(token %alloca) + %k2 = lshr i32 %k, 1 + %cmp = icmp ugt i32 %k, 128 + br i1 %cmp, label %loop2, label %loop +} + +; CHECK-LABEL: define { i8*, i32 } @i(i8* %buffer, i32 %n) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32* +; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %n, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK-NEXT: } + +; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull %0) +; CHECK-NEXT: : +; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32* +; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4 +; CHECK-NEXT: %inc = add i32 [[T1]], 1 +; CHECK-NEXT: br label %loop2 +; CHECK: : +; CHECK-NEXT: store i32 %k, i32* [[NSLOT]], align 4 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %k, 1 +; CHECK-NEXT: ret { i8*, i32 } [[RET]] +; CHECK: loop2: +; CHECK-NEXT: %k = phi i32 [ %inc, {{.*}} ], [ %k2, %loop2 ] +; CHECK-NEXT: [[SAVE:%.*]] = call i8* @llvm.stacksave() +; CHECK-NEXT: [[T0:%.*]] = zext i32 %k to i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8 +; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]]) +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVE]]) +; CHECK-NEXT: %cmp = icmp ugt i32 %k, 128 +; CHECK-NEXT: %k2 = lshr i32 %k, 1 +; CHECK-NEXT: br i1 %cmp, label %loop2, +; CHECK-NEXT: } + +declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.suspend.retcon.i1(...) +declare void @llvm.coro.suspend.retcon.isVoid(...) +declare i1 @llvm.coro.end(i8*, i1) +declare i8* @llvm.coro.prepare.retcon(i8*) +declare token @llvm.coro.alloca.alloc.i32(i32, i32) +declare i8* @llvm.coro.alloca.get(token) +declare void @llvm.coro.alloca.free(token) + +declare noalias i8* @allocate(i32 %size) +declare void @deallocate(i8* %ptr) + +declare void @print(i32) +declare void @use(i8*)