1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[LICM][Coroutine] Don't sink stores from loops with coro.suspend instructions

See pr46990(https://bugs.llvm.org/show_bug.cgi?id=46990). LICM should not sink store instructions to loop exit blocks which cross coro.suspend intrinsics. This breaks semantic of coro.suspend intrinsic which return to caller directly. Also this leads to use-after-free if the coroutine is freed before control returns to the caller in multithread environment.

This patch disable promotion by check whether loop contains coro.suspend intrinsics.
This is a resubmit of D86190.
Disabling LICM for loops with coroutine suspension is a better option not only for correctness purpose but also for performance purpose.
In most cases LICM sinks memory operations. In the case of coroutine, sinking memory operation out of the loop does not improve performance since coroutien needs to get data from the frame anyway. In fact LICM would hurt coroutine performance since it adds more entries to the frame.

Differential Revision: https://reviews.llvm.org/D96928
This commit is contained in:
Xun Li 2021-03-03 15:21:57 -08:00
parent e0a172f86d
commit a7cf9dd738
4 changed files with 98 additions and 24 deletions

View File

@ -1760,6 +1760,14 @@ earlier passes.
Areas Requiring Attention
=========================
#. When coro.suspend returns -1, the coroutine is suspended, and it's possible
that the coroutine has already been destroyed (hence the frame has been freed).
We cannot access anything on the frame on the suspend path.
However there is nothing that prevents the compiler from moving instructions
along that path (e.g. LICM), which can lead to use-after-free. At the moment
we disabled LICM for loops that have coro.suspend, but the general problem still
exists and requires a general solution.
#. Take advantage of the lifetime intrinsics for the data that goes into the
coroutine frame. Leave lifetime intrinsics as is for the data that stays in
allocas.

View File

@ -362,6 +362,22 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<MemorySSAUpdater> MSSAU;
std::unique_ptr<SinkAndHoistLICMFlags> Flags;
// Don't sink stores from loops with coroutine suspend instructions.
// LICM would sink instructions into the default destination of
// the coroutine switch. The default destination of the switch is to
// handle the case where the coroutine is suspended, by which point the
// coroutine frame may have been destroyed. No instruction can be sunk there.
// FIXME: This would unfortunately hurt the performance of coroutines, however
// there is currently no general solution for this. Similar issues could also
// potentially happen in other passes where instructions are being moved
// across that edge.
bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
return llvm::any_of(*BB, [](Instruction &I) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
});
});
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
@ -408,7 +424,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
!Flags->tooManyMemoryAccesses()) {
!Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);

View File

@ -1,9 +1,25 @@
; Need to move users of allocas that were moved into the coroutine frame after
; coro.begin.
; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -O2 -enable-coroutines -S | FileCheck %s
; RUN: opt < %s -preserve-alignment-assumptions-during-inlining=false -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
; RUN: opt < %s -coro-split -S | FileCheck %s
; RUN: opt < %s -passes=coro-split -S | FileCheck %s
define nonnull i8* @f(i32 %n) {
define nonnull i8* @f(i32 %n) "coroutine.presplit"="1" {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*))
; CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_ADDR]], align 4
; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i32 24)
; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[CALL]])
; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame*
; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4
;
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null);
%n.addr = alloca i32
@ -45,24 +61,6 @@ entry:
call void @llvm.coro.resume(i8* %hdl)
call void @llvm.coro.destroy(i8* %hdl)
ret i32 0
; CHECK: call void @ctor
; CHECK-NEXT: %dec1.spill.addr.i = getelementptr inbounds i8, i8* %call.i, i64 20
; CHECK-NEXT: bitcast i8* %dec1.spill.addr.i to i32*
; CHECK-NEXT: store i32 4
; CHECK-NEXT: call void @print(i32 4)
; CHECK-NEXT: %index.addr12.i = getelementptr inbounds i8, i8* %call.i, i64 24
; CHECK-NEXT: bitcast i8* %index.addr12.i to i1*
; CHECK-NEXT: store i1 false
; CHECK-NEXT: store i32 3
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: store i32 3
; CHECK-NEXT: call void @print(i32 3)
; CHECK-NEXT: store i1 false
; CHECK-NEXT: store i32 2
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: store i32 2
; CHECK-NEXT: call void @print(i32 2)
; CHECK: ret i32 0
}
declare i8* @malloc(i32)

View File

@ -0,0 +1,52 @@
; Verifies that LICM is disabled for loops that contains coro.suspend.
; RUN: opt -S < %s -passes=licm | FileCheck %s
define i64 @licm(i64 %n) #0 {
; CHECK-LABEL: @licm(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P:%.*]] = alloca i64, align 8
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB0]] ], [ [[T5:%.*]], [[AWAIT_READY:%.*]] ]
; CHECK-NEXT: [[T5]] = add i64 [[I]], 1
; CHECK-NEXT: [[SUSPEND:%.*]] = call i8 @llvm.coro.suspend(token none, i1 false)
; CHECK-NEXT: switch i8 [[SUSPEND]], label [[BB2:%.*]] [
; CHECK-NEXT: i8 0, label [[AWAIT_READY]]
; CHECK-NEXT: ]
; CHECK: await.ready:
; CHECK-NEXT: store i64 1, i64* [[P]], align 4
; CHECK-NEXT: [[T6:%.*]] = icmp ult i64 [[T5]], [[N:%.*]]
; CHECK-NEXT: br i1 [[T6]], label [[LOOP]], label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[RES:%.*]] = call i1 @llvm.coro.end(i8* null, i1 false)
; CHECK-NEXT: ret i64 0
;
entry:
%p = alloca i64
br label %bb0
bb0:
br label %loop
loop:
%i = phi i64 [ 0, %bb0 ], [ %t5, %await.ready ]
%t5 = add i64 %i, 1
%suspend = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %suspend, label %bb2 [
i8 0, label %await.ready
]
await.ready:
store i64 1, i64* %p
%t6 = icmp ult i64 %t5, %n
br i1 %t6, label %loop, label %bb2
bb2:
%res = call i1 @llvm.coro.end(i8* null, i1 false)
ret i64 0
}
declare i8 @llvm.coro.suspend(token, i1)
declare i1 @llvm.coro.end(i8*, i1)