mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
70941d66be
A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472
70 lines
2.1 KiB
LLVM
70 lines
2.1 KiB
LLVM
; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
|
|
|
|
; We can value forward across the fence since we can (semantically)
|
|
; reorder the following load before the fence.
|
|
define i32 @test(i32* %addr.i) {
|
|
; CHECK-LABEL: @test
|
|
; CHECK: store
|
|
; CHECK: fence
|
|
; CHECK-NOT: load
|
|
; CHECK: ret
|
|
store i32 5, i32* %addr.i, align 4
|
|
fence release
|
|
%a = load i32, i32* %addr.i, align 4
|
|
ret i32 %a
|
|
}
|
|
|
|
; Same as above
|
|
define i32 @test2(i32* %addr.i) {
|
|
; CHECK-LABEL: @test2
|
|
; CHECK-NEXT: fence
|
|
; CHECK-NOT: load
|
|
; CHECK: ret
|
|
%a = load i32, i32* %addr.i, align 4
|
|
fence release
|
|
%a2 = load i32, i32* %addr.i, align 4
|
|
%res = sub i32 %a, %a2
|
|
ret i32 %res
|
|
}
|
|
|
|
; We can not value forward across an acquire barrier since we might
|
|
; be syncronizing with another thread storing to the same variable
|
|
; followed by a release fence. This is not so much enforcing an
|
|
; ordering property (though it is that too), but a liveness
|
|
; property. We expect to eventually see the value of store by
|
|
; another thread when spinning on that location.
|
|
define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
|
|
; CHECK-LABEL: @test3
|
|
; CHECK: load
|
|
; CHECK: fence
|
|
; CHECK: load
|
|
; CHECK: ret i32 %res
|
|
; the following code is intented to model the unrolling of
|
|
; two iterations in a spin loop of the form:
|
|
; do { fence acquire: tmp = *%addr.i; ) while (!tmp);
|
|
; It's hopefully clear that allowing PRE to turn this into:
|
|
; if (!*%addr.i) while(true) {} would be unfortunate
|
|
fence acquire
|
|
%a = load i32, i32* %addr.i, align 4
|
|
fence acquire
|
|
%a2 = load i32, i32* %addr.i, align 4
|
|
%res = sub i32 %a, %a2
|
|
ret i32 %res
|
|
}
|
|
|
|
; Another example of why forwarding across an acquire fence is problematic
|
|
; can be seen in a normal locking operation. Say we had:
|
|
; *p = 5; unlock(l); lock(l); use(p);
|
|
; forwarding the store to p would be invalid. A reasonable implementation
|
|
; of unlock and lock might be:
|
|
; unlock() { atomicrmw sub %l, 1 unordered; fence release }
|
|
; lock() {
|
|
; do {
|
|
; %res = cmpxchg %p, 0, 1, monotonic monotonic
|
|
; } while(!%res.success)
|
|
; fence acquire;
|
|
; }
|
|
; Given we chose to forward across the release fence, we clearly can't forward
|
|
; across the acquire fence as well.
|
|
|