1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

Add intrinsics for doing frame-bound dynamic allocations within a coroutine.

These rely on having an allocator provided to the coroutine and thus,
for now, only work in retcon lowerings.

llvm-svn: 368791
This commit is contained in:
John McCall 2019-08-14 03:53:40 +00:00
parent 5b6ef04ee6
commit a6c3e5d871
5 changed files with 464 additions and 4 deletions

View File

@ -985,6 +985,10 @@ def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>;
def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
[IntrNoMem]>;
def int_coro_alloca_alloc : Intrinsic<[llvm_token_ty],
[llvm_anyint_ty, llvm_i32_ty], []>;
def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>;
def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>;
def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrNoMem, ReadNone<0>, ReadNone<1>]>;

View File

@ -960,6 +960,155 @@ static void splitAround(Instruction *I, const Twine &Name) {
splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
}
static bool isSuspendBlock(BasicBlock *BB) {
return isa<AnyCoroSuspendInst>(BB->front());
}
typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
/// Does control flow starting at the given block ever reach a suspend
/// instruction before reaching a block in VisitedOrFreeBBs?
static bool isSuspendReachableFrom(BasicBlock *From,
VisitedBlocksSet &VisitedOrFreeBBs) {
// Eagerly try to add this block to the visited set. If it's already
// there, stop recursing; this path doesn't reach a suspend before
// either looping or reaching a freeing block.
if (!VisitedOrFreeBBs.insert(From).second)
return false;
// We assume that we'll already have split suspends into their own blocks.
if (isSuspendBlock(From))
return true;
// Recurse on the successors.
for (auto Succ : successors(From)) {
if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
return true;
}
return false;
}
/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
/// suspend point?
static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
// Seed the visited set with all the basic blocks containing a free
// so that we won't pass them up.
VisitedBlocksSet VisitedOrFreeBBs;
for (auto User : AI->users()) {
if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
VisitedOrFreeBBs.insert(FI->getParent());
}
return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
}
/// After we split the coroutine, will the given basic block be along
/// an obvious exit path for the resumption function?
static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
unsigned depth = 3) {
// If we've bottomed out our depth count, stop searching and assume
// that the path might loop back.
if (depth == 0) return false;
// If this is a suspend block, we're about to exit the resumption function.
if (isSuspendBlock(BB)) return true;
// Recurse into the successors.
for (auto Succ : successors(BB)) {
if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
return false;
}
// If none of the successors leads back in a loop, we're on an exit/abort.
return true;
}
static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
// Look for a free that isn't sufficiently obviously followed by
// either a suspend or a termination, i.e. something that will leave
// the coro resumption frame.
for (auto U : AI->users()) {
auto FI = dyn_cast<CoroAllocaFreeInst>(U);
if (!FI) continue;
if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
return true;
}
// If we never found one, we don't need a stack save.
return false;
}
/// Turn each of the given local allocas into a normal (dynamic) alloca
/// instruction.
static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas) {
for (auto AI : LocalAllocas) {
auto M = AI->getModule();
IRBuilder<> Builder(AI);
// Save the stack depth. Try to avoid doing this if the stackrestore
// is going to immediately precede a return or something.
Value *StackSave = nullptr;
if (localAllocaNeedsStackSave(AI))
StackSave = Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::stacksave));
// Allocate memory.
auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
Alloca->setAlignment(AI->getAlignment());
for (auto U : AI->users()) {
// Replace gets with the allocation.
if (isa<CoroAllocaGetInst>(U)) {
U->replaceAllUsesWith(Alloca);
// Replace frees with stackrestores. This is safe because
// alloca.alloc is required to obey a stack discipline, although we
// don't enforce that structurally.
} else {
auto FI = cast<CoroAllocaFreeInst>(U);
if (StackSave) {
Builder.SetInsertPoint(FI);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
StackSave);
}
}
cast<Instruction>(U)->eraseFromParent();
}
AI->eraseFromParent();
}
}
/// Turn the given coro.alloca.alloc call into a dynamic allocation.
/// This happens during the all-instructions iteration, so it must not
/// delete the call.
static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
coro::Shape &Shape,
SmallVectorImpl<Instruction*> &DeadInsts) {
IRBuilder<> Builder(AI);
auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
for (User *U : AI->users()) {
if (isa<CoroAllocaGetInst>(U)) {
U->replaceAllUsesWith(Alloc);
} else {
auto FI = cast<CoroAllocaFreeInst>(U);
Builder.SetInsertPoint(FI);
Shape.emitDealloc(Builder, Alloc, nullptr);
}
DeadInsts.push_back(cast<Instruction>(U));
}
// Push this on last so that it gets deleted after all the others.
DeadInsts.push_back(AI);
// Return the new allocation value so that we can check for needed spills.
return cast<Instruction>(Alloc);
}
void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
// access to local variables.
@ -992,6 +1141,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
IRBuilder<> Builder(F.getContext());
SpillInfo Spills;
SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
SmallVector<Instruction*, 4> DeadInstructions;
for (int Repeat = 0; Repeat < 4; ++Repeat) {
// See if there are materializable instructions across suspend points.
@ -1021,12 +1172,35 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// of the Coroutine Frame.
if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
continue;
// The Coroutine Promise always included into coroutine frame, no need to
// check for suspend crossing.
if (Shape.ABI == coro::ABI::Switch &&
Shape.SwitchLowering.PromiseAlloca == &I)
continue;
// Handle alloca.alloc specially here.
if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
// Check whether the alloca's lifetime is bounded by suspend points.
if (isLocalAlloca(AI)) {
LocalAllocas.push_back(AI);
continue;
}
// If not, do a quick rewrite of the alloca and then add spills of
// the rewritten value. The rewrite doesn't invalidate anything in
// Spills because the other alloca intrinsics have no other operands
// besides AI, and it doesn't invalidate the iteration because we delay
// erasing AI.
auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
for (User *U : Alloc->users()) {
if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
Spills.emplace_back(Alloc, U);
}
continue;
}
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U)) {
// We cannot spill a token.
@ -1040,4 +1214,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, Spills);
Shape.FramePtr = insertSpills(Spills, Shape);
lowerLocalAllocas(LocalAllocas);
for (auto I : DeadInstructions)
I->eraseFromParent();
}

View File

@ -456,6 +456,60 @@ public:
}
};
/// This represents the llvm.coro.alloca.alloc instruction.
class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst {
enum { SizeArg, AlignArg };
public:
Value *getSize() const {
return getArgOperand(SizeArg);
}
unsigned getAlignment() const {
return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This represents the llvm.coro.alloca.get instruction.
class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst {
enum { AllocArg };
public:
CoroAllocaAllocInst *getAlloc() const {
return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg));
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_alloca_get;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This represents the llvm.coro.alloca.free instruction.
class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst {
enum { AllocArg };
public:
CoroAllocaAllocInst *getAlloc() const {
return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg));
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_alloca_free;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
} // End namespace llvm.
#endif

View File

@ -424,10 +424,15 @@ void coro::Shape::buildFrom(Function &F) {
// Check that the result type of the suspend matches the resume types.
Type *SResultTy = Suspend->getType();
ArrayRef<Type*> SuspendResultTys =
(isa<StructType>(SResultTy)
? cast<StructType>(SResultTy)->elements()
: SResultTy); // forms an ArrayRef using SResultTy, be careful
ArrayRef<Type*> SuspendResultTys;
if (SResultTy->isVoidTy()) {
// leave as empty array
} else if (auto SResultStructTy = dyn_cast<StructType>(SResultTy)) {
SuspendResultTys = SResultStructTy->elements();
} else {
// forms an ArrayRef using SResultTy, be careful
SuspendResultTys = SResultTy;
}
if (SuspendResultTys.size() != ResumeTys.size()) {
#ifndef NDEBUG
Suspend->dump();

View File

@ -0,0 +1,219 @@
; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s
target datalayout = "p:64:64:64"
declare {i8*, i8*, i32} @prototype_f(i8*, i1)
define {i8*, i8*, i32} @f(i8* %buffer, i32 %n) {
entry:
%id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i8*, i32} (i8*, i1)* @prototype_f to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %resume ]
%alloca = call token @llvm.coro.alloca.alloc.i32(i32 %n.val, i32 8)
%ptr = call i8* @llvm.coro.alloca.get(token %alloca)
%unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i8* %ptr, i32 %n.val)
call void @llvm.coro.alloca.free(token %alloca)
br i1 %unwind, label %cleanup, label %resume
resume:
%inc = add i32 %n.val, 1
br label %loop
cleanup:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
unreachable
}
; CHECK-LABEL: define { i8*, i8*, i32 } @f(i8* %buffer, i32 %n)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32*
; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4
; CHECK-NEXT: [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n)
; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %buffer, i64 8
; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8**
; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T1]], align 8
; CHECK-NEXT: [[T0:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[ALLOC]], 1
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i8*, i32 } [[T0]], i32 %n, 2
; CHECK-NEXT: ret { i8*, i8*, i32 } [[RET]]
; CHECK-NEXT: }
; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull %0, i1 %1)
; CHECK-NEXT: :
; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %0, i64 8
; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i8**
; CHECK-NEXT: [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8
; CHECK-NEXT: tail call void @deallocate(i8* [[ALLOC]])
; CHECK-NEXT: br i1 %1,
declare {i8*, i32} @prototype_g(i8*, i1)
define {i8*, i32} @g(i8* %buffer, i32 %n) {
entry:
%id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*, i1)* @prototype_g to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %resume ]
%alloca = call token @llvm.coro.alloca.alloc.i32(i32 %n.val, i32 8)
%ptr = call i8* @llvm.coro.alloca.get(token %alloca)
call void @use(i8* %ptr)
call void @llvm.coro.alloca.free(token %alloca)
%unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %n.val)
br i1 %unwind, label %cleanup, label %resume
resume:
%inc = add i32 %n.val, 1
br label %loop
cleanup:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
unreachable
}
; CHECK-LABEL: define { i8*, i32 } @g(i8* %buffer, i32 %n)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32*
; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4
; CHECK-NEXT: [[T0:%.*]] = zext i32 %n to i64
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]])
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 %n, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK-NEXT: }
; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull %0, i1 %1)
; CHECK-NEXT: :
; CHECK-NEXT: br i1 %1,
; CHECK: :
; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to i32*
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4
; CHECK-NEXT: %inc = add i32 [[T1]], 1
; CHECK-NEXT: store i32 %inc, i32* [[T0]], align 4
; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]])
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 %inc, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK: :
; CHECK-NEXT: ret { i8*, i32 } { i8* null, i32 undef }
declare {i8*, i32} @prototype_h(i8*, i1)
define {i8*, i32} @h(i8* %buffer, i32 %n) {
entry:
%id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*, i1)* @prototype_h to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %resume ]
%unwind = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %n.val)
br i1 %unwind, label %cleanup, label %resume
resume:
%inc = add i32 %n.val, 1
%alloca = call token @llvm.coro.alloca.alloc.i32(i32 %inc, i32 8)
%ptr = call i8* @llvm.coro.alloca.get(token %alloca)
call void @use(i8* %ptr)
call void @llvm.coro.alloca.free(token %alloca)
br label %loop
cleanup:
call i1 @llvm.coro.end(i8* %hdl, i1 0)
unreachable
}
; CHECK-LABEL: define { i8*, i32 } @h(i8* %buffer, i32 %n)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32*
; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %n, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK-NEXT: }
; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull %0, i1 %1)
; CHECK-NEXT: :
; CHECK-NEXT: br i1 %1,
; CHECK: :
; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32*
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4
; CHECK-NEXT: %inc = add i32 [[T1]], 1
; CHECK-NEXT: [[T0:%.*]] = zext i32 %inc to i64
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]])
; CHECK-NEXT: store i32 %inc, i32* [[NSLOT]], align 4
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 %inc, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK: :
; CHECK-NEXT: ret { i8*, i32 } { i8* null, i32 undef }
declare {i8*, i32} @prototype_i(i8*)
define {i8*, i32} @i(i8* %buffer, i32 %n) {
entry:
%id = call token @llvm.coro.id.retcon(i32 1024, i32 8, i8* %buffer, i8* bitcast ({i8*, i32} (i8*)* @prototype_i to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %k, %loop2 ]
call void (...) @llvm.coro.suspend.retcon.isVoid(i32 %n.val)
%inc = add i32 %n.val, 1
br label %loop2
loop2:
%k = phi i32 [ %inc, %loop ], [ %k2, %loop2 ]
%alloca = call token @llvm.coro.alloca.alloc.i32(i32 %k, i32 8)
%ptr = call i8* @llvm.coro.alloca.get(token %alloca)
call void @use(i8* %ptr)
call void @llvm.coro.alloca.free(token %alloca)
%k2 = lshr i32 %k, 1
%cmp = icmp ugt i32 %k, 128
br i1 %cmp, label %loop2, label %loop
}
; CHECK-LABEL: define { i8*, i32 } @i(i8* %buffer, i32 %n)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i32*
; CHECK-NEXT: store i32 %n, i32* [[T0]], align 4
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %n, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK-NEXT: }
; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull %0)
; CHECK-NEXT: :
; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32*
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 4
; CHECK-NEXT: %inc = add i32 [[T1]], 1
; CHECK-NEXT: br label %loop2
; CHECK: :
; CHECK-NEXT: store i32 %k, i32* [[NSLOT]], align 4
; CHECK-NEXT: [[RET:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 %k, 1
; CHECK-NEXT: ret { i8*, i32 } [[RET]]
; CHECK: loop2:
; CHECK-NEXT: %k = phi i32 [ %inc, {{.*}} ], [ %k2, %loop2 ]
; CHECK-NEXT: [[SAVE:%.*]] = call i8* @llvm.stacksave()
; CHECK-NEXT: [[T0:%.*]] = zext i32 %k to i64
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, i64 [[T0]], align 8
; CHECK-NEXT: call void @use(i8* nonnull [[ALLOC]])
; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVE]])
; CHECK-NEXT: %cmp = icmp ugt i32 %k, 128
; CHECK-NEXT: %k2 = lshr i32 %k, 1
; CHECK-NEXT: br i1 %cmp, label %loop2,
; CHECK-NEXT: }
declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.suspend.retcon.i1(...)
declare void @llvm.coro.suspend.retcon.isVoid(...)
declare i1 @llvm.coro.end(i8*, i1)
declare i8* @llvm.coro.prepare.retcon(i8*)
declare token @llvm.coro.alloca.alloc.i32(i32, i32)
declare i8* @llvm.coro.alloca.get(token)
declare void @llvm.coro.alloca.free(token)
declare noalias i8* @allocate(i32 %size)
declare void @deallocate(i8* %ptr)
declare void @print(i32)
declare void @use(i8*)