1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

AMDGPU: Add support for cross address space synchronization scopes

Differential Revision: https://reviews.llvm.org/D59517

llvm-svn: 356946
This commit is contained in:
Konstantin Zhuravlyov 2019-03-25 20:50:21 +00:00
parent a23410c78e
commit 5a27d2d078
19 changed files with 2683 additions and 481 deletions

View File

@ -323,62 +323,80 @@ is conservatively correct for OpenCL.
.. table:: AMDHSA LLVM Sync Scopes
:name: amdgpu-amdhsa-llvm-sync-scopes-table
================ ==========================================================
LLVM Sync Scope Description
================ ==========================================================
*none* The default: ``system``.
======================= ===================================================
LLVM Sync Scope Description
======================= ===================================================
*none* The default: ``system``.
Synchronizes with, and participates in modification and
seq_cst total orderings with, other operations (except
image operations) for all address spaces (except private,
or generic that accesses private) provided the other
operation's sync scope is:
Synchronizes with, and participates in modification
and seq_cst total orderings with, other operations
(except image operations) for all address spaces
(except private, or generic that accesses private)
provided the other operation's sync scope is:
- ``system``.
- ``agent`` and executed by a thread on the same agent.
- ``workgroup`` and executed by a thread in the same
workgroup.
- ``wavefront`` and executed by a thread in the same
wavefront.
- ``system``.
- ``agent`` and executed by a thread on the same
agent.
- ``workgroup`` and executed by a thread in the
same workgroup.
- ``wavefront`` and executed by a thread in the
same wavefront.
``agent`` Synchronizes with, and participates in modification and
seq_cst total orderings with, other operations (except
image operations) for all address spaces (except private,
or generic that accesses private) provided the other
operation's sync scope is:
``agent`` Synchronizes with, and participates in modification
and seq_cst total orderings with, other operations
(except image operations) for all address spaces
(except private, or generic that accesses private)
provided the other operation's sync scope is:
- ``system`` or ``agent`` and executed by a thread on the
same agent.
- ``workgroup`` and executed by a thread in the same
workgroup.
- ``wavefront`` and executed by a thread in the same
wavefront.
- ``system`` or ``agent`` and executed by a thread
on the same agent.
- ``workgroup`` and executed by a thread in the
same workgroup.
- ``wavefront`` and executed by a thread in the
same wavefront.
``workgroup`` Synchronizes with, and participates in modification and
seq_cst total orderings with, other operations (except
image operations) for all address spaces (except private,
or generic that accesses private) provided the other
operation's sync scope is:
``workgroup`` Synchronizes with, and participates in modification
and seq_cst total orderings with, other operations
(except image operations) for all address spaces
(except private, or generic that accesses private)
provided the other operation's sync scope is:
- ``system``, ``agent`` or ``workgroup`` and executed by a
thread in the same workgroup.
- ``wavefront`` and executed by a thread in the same
wavefront.
- ``system``, ``agent`` or ``workgroup`` and
executed by a thread in the same workgroup.
- ``wavefront`` and executed by a thread in the
same wavefront.
``wavefront`` Synchronizes with, and participates in modification and
seq_cst total orderings with, other operations (except
image operations) for all address spaces (except private,
or generic that accesses private) provided the other
operation's sync scope is:
``wavefront`` Synchronizes with, and participates in modification
and seq_cst total orderings with, other operations
(except image operations) for all address spaces
(except private, or generic that accesses private)
provided the other operation's sync scope is:
- ``system``, ``agent``, ``workgroup`` or ``wavefront``
and executed by a thread in the same wavefront.
- ``system``, ``agent``, ``workgroup`` or
``wavefront`` and executed by a thread in the
same wavefront.
``singlethread`` Only synchronizes with, and participates in modification
and seq_cst total orderings with, other operations (except
image operations) running in the same thread for all
address spaces (for example, in signal handlers).
================ ==========================================================
``singlethread`` Only synchronizes with, and participates in
modification and seq_cst total orderings with,
other operations (except image operations) running
in the same thread for all address spaces (for
example, in signal handlers).
``one-as`` Same as ``system`` but only synchronizes with other
operations within the same address space.
``agent-one-as`` Same as ``agent`` but only synchronizes with other
operations within the same address space.
``workgroup-one-as`` Same as ``workgroup`` but only synchronizes with
other operations within the same address space.
``wavefront-one-as`` Same as ``wavefront`` but only synchronizes with
other operations within the same address space.
``singlethread-one-as`` Same as ``singlethread`` but only synchronizes with
other operations within the same address space.
======================= ===================================================
AMDGPU Intrinsics
-----------------

View File

@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
AgentSSID = CTX.getOrInsertSyncScopeID("agent");
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
SystemOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("one-as");
AgentOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("agent-one-as");
WorkgroupOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("workgroup-one-as");
WavefrontOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("wavefront-one-as");
SingleThreadOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("singlethread-one-as");
}
} // end namespace llvm

View File

@ -29,12 +29,22 @@ private:
// All supported memory/synchronization scopes can be found here:
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
/// Agent synchronization scope ID.
/// Agent synchronization scope ID (cross address space).
SyncScope::ID AgentSSID;
/// Workgroup synchronization scope ID.
/// Workgroup synchronization scope ID (cross address space).
SyncScope::ID WorkgroupSSID;
/// Wavefront synchronization scope ID.
/// Wavefront synchronization scope ID (cross address space).
SyncScope::ID WavefrontSSID;
/// System synchronization scope ID (single address space).
SyncScope::ID SystemOneAddressSpaceSSID;
/// Agent synchronization scope ID (single address space).
SyncScope::ID AgentOneAddressSpaceSSID;
/// Workgroup synchronization scope ID (single address space).
SyncScope::ID WorkgroupOneAddressSpaceSSID;
/// Wavefront synchronization scope ID (single address space).
SyncScope::ID WavefrontOneAddressSpaceSSID;
/// Single thread synchronization scope ID (single address space).
SyncScope::ID SingleThreadOneAddressSpaceSSID;
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@ -43,35 +53,70 @@ private:
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
/// supported by the AMDGPU target.
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
if (SSID == SyncScope::SingleThread)
if (SSID == SyncScope::SingleThread ||
SSID == getSingleThreadOneAddressSpaceSSID())
return 0;
else if (SSID == getWavefrontSSID())
else if (SSID == getWavefrontSSID() ||
SSID == getWavefrontOneAddressSpaceSSID())
return 1;
else if (SSID == getWorkgroupSSID())
else if (SSID == getWorkgroupSSID() ||
SSID == getWorkgroupOneAddressSpaceSSID())
return 2;
else if (SSID == getAgentSSID())
else if (SSID == getAgentSSID() ||
SSID == getAgentOneAddressSpaceSSID())
return 3;
else if (SSID == SyncScope::System)
else if (SSID == SyncScope::System ||
SSID == getSystemOneAddressSpaceSSID())
return 4;
return None;
}
/// \returns True if \p SSID is restricted to single address space, false
/// otherwise
bool isOneAddressSpace(SyncScope::ID SSID) const {
return SSID == getSingleThreadOneAddressSpaceSSID() ||
SSID == getWavefrontOneAddressSpaceSSID() ||
SSID == getWorkgroupOneAddressSpaceSSID() ||
SSID == getAgentOneAddressSpaceSSID() ||
SSID == getSystemOneAddressSpaceSSID();
}
public:
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
/// \returns Agent synchronization scope ID.
/// \returns Agent synchronization scope ID (cross address space).
SyncScope::ID getAgentSSID() const {
return AgentSSID;
}
/// \returns Workgroup synchronization scope ID.
/// \returns Workgroup synchronization scope ID (cross address space).
SyncScope::ID getWorkgroupSSID() const {
return WorkgroupSSID;
}
/// \returns Wavefront synchronization scope ID.
/// \returns Wavefront synchronization scope ID (cross address space).
SyncScope::ID getWavefrontSSID() const {
return WavefrontSSID;
}
/// \returns System synchronization scope ID (single address space).
SyncScope::ID getSystemOneAddressSpaceSSID() const {
return SystemOneAddressSpaceSSID;
}
/// \returns Agent synchronization scope ID (single address space).
SyncScope::ID getAgentOneAddressSpaceSSID() const {
return AgentOneAddressSpaceSSID;
}
/// \returns Workgroup synchronization scope ID (single address space).
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
return WorkgroupOneAddressSpaceSSID;
}
/// \returns Wavefront synchronization scope ID (single address space).
SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
return WavefrontOneAddressSpaceSSID;
}
/// \returns Single thread synchronization scope ID (single address space).
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
return SingleThreadOneAddressSpaceSSID;
}
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@ -87,7 +132,11 @@ public:
if (!AIO || !BIO)
return None;
return AIO.getValue() > BIO.getValue();
bool IsAOneAddressSpace = isOneAddressSpace(A);
bool IsBOneAddressSpace = isOneAddressSpace(B);
return AIO.getValue() >= BIO.getValue() &&
(IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
}
};

View File

@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const {
/// TODO: For now assume OpenCL memory model which treats each
/// address space as having a separate happens-before relation, and
/// so an instruction only has ordering with respect to the address
/// space it accesses, and if it accesses multiple address spaces it
/// does not require ordering of operations in different address
/// spaces.
if (SSID == SyncScope::System)
if (SSID == SyncScope::System)
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getAgentSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWorkgroupSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWavefrontSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == SyncScope::SingleThread)
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
if (SSID == MMI->getAgentSSID())
if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
if (SSID == MMI->getWorkgroupSSID())
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
if (SSID == MMI->getWavefrontSSID())
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
if (SSID == SyncScope::SingleThread)
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
/// TODO: To support HSA Memory Model need to add additional memory
/// scopes that specify that do require cross address space
/// ordering.
return None;
}
@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VMCnt = false;
bool LGKMCnt = false;
bool EXPCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
VMCnt = true;
VMCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory
// operations of the same wave.
LGKMCnt = IsCrossAddrSpaceOrdering;
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory
// operations of the same wave.
EXPCnt = IsCrossAddrSpaceOrdering;
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
}
}
if (VMCnt || LGKMCnt || EXPCnt) {
if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
EXPCnt ? 0 : getExpcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;

View File

@ -12,8 +12,10 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]

View File

@ -27,9 +27,9 @@ bb:
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp
%tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4
fence syncscope("workgroup") release
fence syncscope("workgroup-one-as") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
fence syncscope("workgroup-one-as") acquire
%tmp4 = add nsw i32 %tmp3, %tmp3
%tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false)
%tmp6 = add nsw i32 %tmp5, %tmp4

View File

@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
@ -49,6 +49,27 @@ define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace
ret void
}
; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
; VI-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
%a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
%a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
%a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
store float %a3, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
; GCN: ds_read_b64
; GCN: v_add_f64

View File

@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
@ -18,7 +18,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@ -29,7 +29,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
@ -41,7 +41,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
@ -50,6 +50,53 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
@ -90,10 +137,50 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@ -105,7 +192,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@ -116,7 +203,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
@ -128,7 +215,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
@ -137,9 +224,56 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@ -150,7 +284,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@ -161,7 +295,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@ -172,7 +306,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {
@ -181,6 +315,50 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
@ -220,3 +398,43 @@ entry:
fence syncscope("wavefront") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; FUNC-LABEL: {{^}}system_acquire:
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}}
@ -10,6 +10,232 @@
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
entry:
fence acquire
@ -19,7 +245,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@ -30,7 +256,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@ -43,7 +269,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@ -96,9 +322,9 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@ -110,7 +336,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@ -121,7 +347,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@ -134,7 +360,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@ -146,7 +372,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@ -157,7 +383,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@ -168,7 +394,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@ -179,7 +405,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {

View File

@ -104,7 +104,7 @@ body: |
S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
S_WAITCNT 3952
BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep)
BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from %ir.gep)
bb.2.exit:
liveins: $sgpr2_sgpr3

View File

@ -1,11 +1,311 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: {{^}}system_monotonic:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
ret void
}
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
ret void
}
; GCN-LABEL: {{^}}system_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
ret void
}
; GCN-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic(
i32* %out, i32 %in) {
entry:
@ -14,9 +314,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire(
i32* %out, i32 %in) {
@ -26,9 +326,9 @@ entry:
}
; GCN-LABEL: {{^}}system_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release(
i32* %out, i32 %in) {
@ -38,9 +338,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel(
i32* %out, i32 %in) {
@ -50,9 +350,9 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst(
i32* %out, i32 %in) {
@ -62,9 +362,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic(
i32* %out, i32 %in) {
@ -74,9 +374,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire(
i32* %out, i32 %in) {
@ -86,9 +386,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release(
i32* %out, i32 %in) {
@ -98,9 +398,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel(
i32* %out, i32 %in) {
@ -110,9 +410,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst(
i32* %out, i32 %in) {
@ -122,9 +422,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic(
i32* %out, i32 %in) {
@ -134,9 +434,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire(
i32* %out, i32 %in) {
@ -146,9 +446,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release(
i32* %out, i32 %in) {
@ -158,9 +458,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel(
i32* %out, i32 %in) {
@ -170,9 +470,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst(
i32* %out, i32 %in) {
@ -182,9 +482,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic(
i32* %out, i32 %in) {
@ -194,9 +494,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire(
i32* %out, i32 %in) {
@ -206,9 +506,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_release:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release(
i32* %out, i32 %in) {
@ -218,9 +518,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acq_rel:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel(
i32* %out, i32 %in) {
@ -230,9 +530,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst(
i32* %out, i32 %in) {
@ -242,9 +542,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic(
i32* %out, i32 %in) {
@ -254,9 +554,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire(
i32* %out, i32 %in) {
@ -266,9 +566,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release(
i32* %out, i32 %in) {
@ -278,9 +578,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel(
i32* %out, i32 %in) {
@ -290,9 +590,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst(
i32* %out, i32 %in) {

View File

@ -11,7 +11,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load seq_cst 4 from `i32 addrspace(42)* undef`)
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`)
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@ -47,7 +47,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@ -63,7 +63,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`)
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...

View File

@ -5,282 +5,282 @@
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_unordered:
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
define amdgpu_kernel void @system_one_as_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in unordered, align 4
%val = load atomic i32, i32* %in syncscope("one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
define amdgpu_kernel void @system_one_as_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in monotonic, align 4
%val = load atomic i32, i32* %in syncscope("one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_acquire:
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire(
define amdgpu_kernel void @system_one_as_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in acquire, align 4
%val = load atomic i32, i32* %in syncscope("one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst(
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in seq_cst, align 4
%val = load atomic i32, i32* %in syncscope("one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
define amdgpu_kernel void @singlethread_one_as_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
define amdgpu_kernel void @agent_one_as_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
%val = load atomic i32, i32* %in syncscope("agent-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
%val = load atomic i32, i32* %in syncscope("agent-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_acquire:
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire(
define amdgpu_kernel void @agent_one_as_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
%val = load atomic i32, i32* %in syncscope("agent-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst(
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
%val = load atomic i32, i32* %in syncscope("agent-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
define amdgpu_kernel void @workgroup_one_as_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire(
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst(
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
define amdgpu_kernel void @wavefront_one_as_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
@ -374,4 +374,284 @@ entry:
ret void
}
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
!0 = !{i32 1}

View File

@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(3)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -784,7 +784,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -806,7 +806,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -828,7 +828,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -850,7 +850,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -872,7 +872,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -894,7 +894,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -916,7 +916,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -938,7 +938,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -960,7 +960,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -982,7 +982,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -1004,7 +1004,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -1026,7 +1026,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@ -1048,7 +1048,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...

View File

@ -3,12 +3,230 @@
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
; FUNC-LABEL: {{^}}system_acquire:
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
entry:
fence acquire
@ -18,7 +236,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@ -29,7 +247,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
@ -41,7 +259,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
@ -93,7 +311,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@ -105,7 +323,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@ -116,7 +334,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
@ -128,7 +346,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
@ -139,7 +357,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@ -150,7 +368,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@ -161,7 +379,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@ -172,7 +390,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {

View File

@ -55,7 +55,7 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952

View File

@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(2)* undef`)
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
---
@ -783,7 +783,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -805,7 +805,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -827,7 +827,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -893,7 +893,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -915,7 +915,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(2)* undef`)
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -937,7 +937,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -959,7 +959,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -981,7 +981,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -1003,7 +1003,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -1025,7 +1025,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@ -1047,7 +1047,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...

View File

@ -5,203 +5,203 @@
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_unordered:
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered(
define amdgpu_kernel void @system_one_as_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out unordered, align 4
store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic(
define amdgpu_kernel void @system_one_as_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out monotonic, align 4
store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}system_release:
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release(
define amdgpu_kernel void @system_one_as_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out release, align 4
store atomic i32 %in, i32* %out syncscope("one-as") release, align 4
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst(
define amdgpu_kernel void @system_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out seq_cst, align 4
store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered(
define amdgpu_kernel void @singlethread_one_as_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic(
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_release:
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release(
define amdgpu_kernel void @singlethread_one_as_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst(
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered(
define amdgpu_kernel void @agent_one_as_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic(
define amdgpu_kernel void @agent_one_as_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}agent_release:
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release(
define amdgpu_kernel void @agent_one_as_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst(
define amdgpu_kernel void @agent_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered(
define amdgpu_kernel void @workgroup_one_as_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic(
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_release:
; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release(
define amdgpu_kernel void @workgroup_one_as_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst(
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered(
define amdgpu_kernel void @wavefront_one_as_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic(
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_release:
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release(
define amdgpu_kernel void @wavefront_one_as_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst(
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4
ret void
}
@ -295,4 +295,204 @@ entry:
ret void
}
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out unordered, align 4
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}system_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out release, align 4
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}agent_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_release:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
ret void
}
!0 = !{i32 1}