mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[LICM] Allow promotion of some stores that are not guaranteed to execute.
Promotion is always legal when a store within the loop is guaranteed to execute. However, this is not a necessary condition - for promotion to be memory model semantics-preserving, it is enough to have a store that dominates every exit block. This is because if the store dominates every exit block, the fact the exit block was executed implies the original store was executed as well. Differential Revision: https://reviews.llvm.org/D28147 llvm-svn: 291171
This commit is contained in:
parent
61599008e2
commit
5863ebbfd3
@ -912,14 +912,23 @@ bool llvm::promoteLoopAccessesToScalars(
|
||||
//
|
||||
// If at least one store is guaranteed to execute, both properties are
|
||||
// satisfied, and promotion is legal.
|
||||
//
|
||||
// This, however, is not a necessary condition. Even if no store/load is
|
||||
// guaranteed to execute, we can still establish these properties:
|
||||
// (p1) by proving that hoisting the load into the preheader is
|
||||
// safe (i.e. proving dereferenceability on all paths through the loop). We
|
||||
// guaranteed to execute, we can still establish these properties.
|
||||
// We can establish (p1) by proving that hoisting the load into the preheader
|
||||
// is safe (i.e. proving dereferenceability on all paths through the loop). We
|
||||
// can use any access within the alias set to prove dereferenceability,
|
||||
// since they're all must alias.
|
||||
// (p2) by proving the memory is thread-local, so the memory model
|
||||
//
|
||||
// There are two ways establish (p2):
|
||||
// a) Prove the location is thread-local. In this case the memory model
|
||||
// requirement does not apply, and stores are safe to insert.
|
||||
// b) Prove a store dominates every exit block. In this case, if an exit
|
||||
// blocks is reached, the original dynamic path would have taken us through
|
||||
// the store, so inserting a store into the exit block is safe. Note that this
|
||||
// is different from the store being guaranteed to execute. For instance,
|
||||
// if an exception is thrown on the first iteration of the loop, the original
|
||||
// store is never executed, but the exit blocks are not executed either.
|
||||
|
||||
bool DereferenceableInPH = false;
|
||||
bool SafeToInsertStore = false;
|
||||
@ -1001,6 +1010,17 @@ bool llvm::promoteLoopAccessesToScalars(
|
||||
}
|
||||
}
|
||||
|
||||
// If a store dominates all exit blocks, it is safe to sink.
|
||||
// As explained above, if an exit block was executed, a dominating
|
||||
// store must have been been executed at least once, so we are not
|
||||
// introducing stores on paths that did not have them.
|
||||
// Note that this only looks at explicit exit blocks. If we ever
|
||||
// start sinking stores into unwind edges (see above), this will break.
|
||||
if (!SafeToInsertStore)
|
||||
SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
|
||||
return DT->dominates(Store->getParent(), Exit);
|
||||
});
|
||||
|
||||
// If the store is not guaranteed to execute, we may still get
|
||||
// deref info through it.
|
||||
if (!DereferenceableInPH) {
|
||||
|
@ -186,6 +186,198 @@ for.end: ; preds = %for.cond.for.end_cr
|
||||
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
|
||||
}
|
||||
|
||||
declare i32 @opaque(i32) argmemonly
|
||||
declare void @capture(i32*)
|
||||
|
||||
; We can promote even if opaque may throw.
|
||||
define i32 @test7() {
|
||||
; CHECK-LABEL: @test7(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %local = alloca
|
||||
; CHECK-NEXT: call void @capture(i32* %local)
|
||||
; CHECK-NEXT: load i32, i32* %local
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
|
||||
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
|
||||
; CHECK-NEXT: %ret = load i32, i32* %local
|
||||
; CHECK-NEXT: ret i32 %ret
|
||||
entry:
|
||||
%local = alloca i32
|
||||
call void @capture(i32* %local)
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%j = phi i32 [ 0, %entry ], [ %next, %loop ]
|
||||
%x = load i32, i32* %local
|
||||
%x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
|
||||
store i32 %x2, i32* %local
|
||||
%next = add i32 %j, 1
|
||||
%cond = icmp eq i32 %next, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%ret = load i32, i32* %local
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Make sure we don't promote if the store is really control-flow dependent.
|
||||
define i32 @test7bad() {
|
||||
; CHECK-LABEL: @test7bad(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %local = alloca
|
||||
; CHECK-NEXT: call void @capture(i32* %local)
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: store i32 %x2, i32* %local
|
||||
; CHECK-NEXT: br label %else
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: %ret = load i32, i32* %local
|
||||
; CHECK-NEXT: ret i32 %ret
|
||||
entry:
|
||||
%local = alloca i32
|
||||
call void @capture(i32* %local)
|
||||
br label %loop
|
||||
loop:
|
||||
%j = phi i32 [ 0, %entry ], [ %next, %else ]
|
||||
%x = load i32, i32* %local
|
||||
%x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local
|
||||
%cmp = icmp eq i32 %x2, 0
|
||||
br i1 %cmp, label %if, label %else
|
||||
|
||||
if:
|
||||
store i32 %x2, i32* %local
|
||||
br label %else
|
||||
|
||||
else:
|
||||
%next = add i32 %j, 1
|
||||
%cond = icmp eq i32 %next, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%ret = load i32, i32* %local
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Even if neither the load nor the store or guaranteed to execute because
|
||||
; opaque() may throw, we can still promote - the load not being guaranteed
|
||||
; doesn't block us, because %local is always dereferenceable.
|
||||
define i32 @test8() {
|
||||
; CHECK-LABEL: @test8(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %local = alloca
|
||||
; CHECK-NEXT: call void @capture(i32* %local)
|
||||
; CHECK-NEXT: load i32, i32* %local
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ]
|
||||
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
|
||||
; CHECK-NEXT: %ret = load i32, i32* %local
|
||||
; CHECK-NEXT: ret i32 %ret
|
||||
entry:
|
||||
%local = alloca i32
|
||||
call void @capture(i32* %local)
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%j = phi i32 [ 0, %entry ], [ %next, %loop ]
|
||||
%throwaway = call i32 @opaque(i32 %j)
|
||||
%x = load i32, i32* %local
|
||||
%x2 = call i32 @opaque(i32 %x)
|
||||
store i32 %x2, i32* %local
|
||||
%next = add i32 %j, 1
|
||||
%cond = icmp eq i32 %next, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%ret = load i32, i32* %local
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
; If the store is "guaranteed modulo exceptions", and the load depends on
|
||||
; control flow, we can only promote if the pointer is otherwise known to be
|
||||
; dereferenceable
|
||||
define i32 @test9() {
|
||||
; CHECK-LABEL: @test9(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %local = alloca
|
||||
; CHECK-NEXT: call void @capture(i32* %local)
|
||||
; CHECK-NEXT: load i32, i32* %local
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ]
|
||||
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local
|
||||
; CHECK-NEXT: %ret = load i32, i32* %local
|
||||
; CHECK-NEXT: ret i32 %ret
|
||||
entry:
|
||||
%local = alloca i32
|
||||
call void @capture(i32* %local)
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%j = phi i32 [ 0, %entry ], [ %next, %else ]
|
||||
%j2 = call i32 @opaque(i32 %j)
|
||||
%cmp = icmp eq i32 %j2, 0
|
||||
br i1 %cmp, label %if, label %else
|
||||
|
||||
if:
|
||||
%x = load i32, i32* %local
|
||||
br label %else
|
||||
|
||||
else:
|
||||
%x2 = phi i32 [ 0, %loop ], [ %x, %if]
|
||||
store i32 %x2, i32* %local
|
||||
%next = add i32 %j, 1
|
||||
%cond = icmp eq i32 %next, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%ret = load i32, i32* %local
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @test9bad(i32 %i) {
|
||||
; CHECK-LABEL: @test9bad(
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %local = alloca
|
||||
; CHECK-NEXT: call void @capture(i32* %local)
|
||||
; CHECK-NEXT: %notderef = getelementptr
|
||||
; CHECK-NEXT: br label %loop
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: load i32, i32* %notderef
|
||||
; CHECK-NEXT: br label %else
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: %ret = load i32, i32* %notderef
|
||||
; CHECK-NEXT: ret i32 %ret
|
||||
entry:
|
||||
%local = alloca i32
|
||||
call void @capture(i32* %local)
|
||||
%notderef = getelementptr i32, i32* %local, i32 %i
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%j = phi i32 [ 0, %entry ], [ %next, %else ]
|
||||
%j2 = call i32 @opaque(i32 %j)
|
||||
%cmp = icmp eq i32 %j2, 0
|
||||
br i1 %cmp, label %if, label %else
|
||||
|
||||
if:
|
||||
%x = load i32, i32* %notderef
|
||||
br label %else
|
||||
|
||||
else:
|
||||
%x2 = phi i32 [ 0, %loop ], [ %x, %if]
|
||||
store i32 %x2, i32* %notderef
|
||||
%next = add i32 %j, 1
|
||||
%cond = icmp eq i32 %next, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%ret = load i32, i32* %notderef
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
!0 = !{!4, !4, i64 0}
|
||||
!1 = !{!"omnipotent char", !2}
|
||||
!2 = !{!"Simple C/C++ TBAA"}
|
||||
|
Loading…
Reference in New Issue
Block a user