1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

AMDGPU: Add support for cross address space synchronization scopes

Differential Revision: https://reviews.llvm.org/D59517

llvm-svn: 356946
This commit is contained in:
Konstantin Zhuravlyov 2019-03-25 20:50:21 +00:00
parent a23410c78e
commit 5a27d2d078
19 changed files with 2683 additions and 481 deletions

View File

@ -323,62 +323,80 @@ is conservatively correct for OpenCL.
.. table:: AMDHSA LLVM Sync Scopes .. table:: AMDHSA LLVM Sync Scopes
:name: amdgpu-amdhsa-llvm-sync-scopes-table :name: amdgpu-amdhsa-llvm-sync-scopes-table
================ ========================================================== ======================= ===================================================
LLVM Sync Scope Description LLVM Sync Scope Description
================ ========================================================== ======================= ===================================================
*none* The default: ``system``. *none* The default: ``system``.
Synchronizes with, and participates in modification and Synchronizes with, and participates in modification
seq_cst total orderings with, other operations (except and seq_cst total orderings with, other operations
image operations) for all address spaces (except private, (except image operations) for all address spaces
or generic that accesses private) provided the other (except private, or generic that accesses private)
operation's sync scope is: provided the other operation's sync scope is:
- ``system``. - ``system``.
- ``agent`` and executed by a thread on the same agent. - ``agent`` and executed by a thread on the same
- ``workgroup`` and executed by a thread in the same agent.
workgroup. - ``workgroup`` and executed by a thread in the
- ``wavefront`` and executed by a thread in the same same workgroup.
wavefront. - ``wavefront`` and executed by a thread in the
same wavefront.
``agent`` Synchronizes with, and participates in modification and ``agent`` Synchronizes with, and participates in modification
seq_cst total orderings with, other operations (except and seq_cst total orderings with, other operations
image operations) for all address spaces (except private, (except image operations) for all address spaces
or generic that accesses private) provided the other (except private, or generic that accesses private)
operation's sync scope is: provided the other operation's sync scope is:
- ``system`` or ``agent`` and executed by a thread on the - ``system`` or ``agent`` and executed by a thread
same agent. on the same agent.
- ``workgroup`` and executed by a thread in the same - ``workgroup`` and executed by a thread in the
workgroup. same workgroup.
- ``wavefront`` and executed by a thread in the same - ``wavefront`` and executed by a thread in the
wavefront. same wavefront.
``workgroup`` Synchronizes with, and participates in modification and ``workgroup`` Synchronizes with, and participates in modification
seq_cst total orderings with, other operations (except and seq_cst total orderings with, other operations
image operations) for all address spaces (except private, (except image operations) for all address spaces
or generic that accesses private) provided the other (except private, or generic that accesses private)
operation's sync scope is: provided the other operation's sync scope is:
- ``system``, ``agent`` or ``workgroup`` and executed by a - ``system``, ``agent`` or ``workgroup`` and
thread in the same workgroup. executed by a thread in the same workgroup.
- ``wavefront`` and executed by a thread in the same - ``wavefront`` and executed by a thread in the
wavefront. same wavefront.
``wavefront`` Synchronizes with, and participates in modification and ``wavefront`` Synchronizes with, and participates in modification
seq_cst total orderings with, other operations (except and seq_cst total orderings with, other operations
image operations) for all address spaces (except private, (except image operations) for all address spaces
or generic that accesses private) provided the other (except private, or generic that accesses private)
operation's sync scope is: provided the other operation's sync scope is:
- ``system``, ``agent``, ``workgroup`` or ``wavefront`` - ``system``, ``agent``, ``workgroup`` or
and executed by a thread in the same wavefront. ``wavefront`` and executed by a thread in the
same wavefront.
``singlethread`` Only synchronizes with, and participates in modification ``singlethread`` Only synchronizes with, and participates in
and seq_cst total orderings with, other operations (except modification and seq_cst total orderings with,
image operations) running in the same thread for all other operations (except image operations) running
address spaces (for example, in signal handlers). in the same thread for all address spaces (for
================ ========================================================== example, in signal handlers).
``one-as`` Same as ``system`` but only synchronizes with other
operations within the same address space.
``agent-one-as`` Same as ``agent`` but only synchronizes with other
operations within the same address space.
``workgroup-one-as`` Same as ``workgroup`` but only synchronizes with
other operations within the same address space.
``wavefront-one-as`` Same as ``wavefront`` but only synchronizes with
other operations within the same address space.
``singlethread-one-as`` Same as ``singlethread`` but only synchronizes with
other operations within the same address space.
======================= ===================================================
AMDGPU Intrinsics AMDGPU Intrinsics
----------------- -----------------

View File

@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
AgentSSID = CTX.getOrInsertSyncScopeID("agent"); AgentSSID = CTX.getOrInsertSyncScopeID("agent");
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup"); WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront"); WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
SystemOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("one-as");
AgentOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("agent-one-as");
WorkgroupOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("workgroup-one-as");
WavefrontOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("wavefront-one-as");
SingleThreadOneAddressSpaceSSID =
CTX.getOrInsertSyncScopeID("singlethread-one-as");
} }
} // end namespace llvm } // end namespace llvm

View File

@ -29,12 +29,22 @@ private:
// All supported memory/synchronization scopes can be found here: // All supported memory/synchronization scopes can be found here:
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes // http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
/// Agent synchronization scope ID. /// Agent synchronization scope ID (cross address space).
SyncScope::ID AgentSSID; SyncScope::ID AgentSSID;
/// Workgroup synchronization scope ID. /// Workgroup synchronization scope ID (cross address space).
SyncScope::ID WorkgroupSSID; SyncScope::ID WorkgroupSSID;
/// Wavefront synchronization scope ID. /// Wavefront synchronization scope ID (cross address space).
SyncScope::ID WavefrontSSID; SyncScope::ID WavefrontSSID;
/// System synchronization scope ID (single address space).
SyncScope::ID SystemOneAddressSpaceSSID;
/// Agent synchronization scope ID (single address space).
SyncScope::ID AgentOneAddressSpaceSSID;
/// Workgroup synchronization scope ID (single address space).
SyncScope::ID WorkgroupOneAddressSpaceSSID;
/// Wavefront synchronization scope ID (single address space).
SyncScope::ID WavefrontOneAddressSpaceSSID;
/// Single thread synchronization scope ID (single address space).
SyncScope::ID SingleThreadOneAddressSpaceSSID;
/// In AMDGPU target synchronization scopes are inclusive, meaning a /// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization /// larger synchronization scope is inclusive of a smaller synchronization
@ -43,35 +53,70 @@ private:
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not /// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
/// supported by the AMDGPU target. /// supported by the AMDGPU target.
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const { Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
if (SSID == SyncScope::SingleThread) if (SSID == SyncScope::SingleThread ||
SSID == getSingleThreadOneAddressSpaceSSID())
return 0; return 0;
else if (SSID == getWavefrontSSID()) else if (SSID == getWavefrontSSID() ||
SSID == getWavefrontOneAddressSpaceSSID())
return 1; return 1;
else if (SSID == getWorkgroupSSID()) else if (SSID == getWorkgroupSSID() ||
SSID == getWorkgroupOneAddressSpaceSSID())
return 2; return 2;
else if (SSID == getAgentSSID()) else if (SSID == getAgentSSID() ||
SSID == getAgentOneAddressSpaceSSID())
return 3; return 3;
else if (SSID == SyncScope::System) else if (SSID == SyncScope::System ||
SSID == getSystemOneAddressSpaceSSID())
return 4; return 4;
return None; return None;
} }
/// \returns True if \p SSID is restricted to single address space, false
/// otherwise
bool isOneAddressSpace(SyncScope::ID SSID) const {
return SSID == getSingleThreadOneAddressSpaceSSID() ||
SSID == getWavefrontOneAddressSpaceSSID() ||
SSID == getWorkgroupOneAddressSpaceSSID() ||
SSID == getAgentOneAddressSpaceSSID() ||
SSID == getSystemOneAddressSpaceSSID();
}
public: public:
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI); AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
/// \returns Agent synchronization scope ID. /// \returns Agent synchronization scope ID (cross address space).
SyncScope::ID getAgentSSID() const { SyncScope::ID getAgentSSID() const {
return AgentSSID; return AgentSSID;
} }
/// \returns Workgroup synchronization scope ID. /// \returns Workgroup synchronization scope ID (cross address space).
SyncScope::ID getWorkgroupSSID() const { SyncScope::ID getWorkgroupSSID() const {
return WorkgroupSSID; return WorkgroupSSID;
} }
/// \returns Wavefront synchronization scope ID. /// \returns Wavefront synchronization scope ID (cross address space).
SyncScope::ID getWavefrontSSID() const { SyncScope::ID getWavefrontSSID() const {
return WavefrontSSID; return WavefrontSSID;
} }
/// \returns System synchronization scope ID (single address space).
SyncScope::ID getSystemOneAddressSpaceSSID() const {
return SystemOneAddressSpaceSSID;
}
/// \returns Agent synchronization scope ID (single address space).
SyncScope::ID getAgentOneAddressSpaceSSID() const {
return AgentOneAddressSpaceSSID;
}
/// \returns Workgroup synchronization scope ID (single address space).
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
return WorkgroupOneAddressSpaceSSID;
}
/// \returns Wavefront synchronization scope ID (single address space).
SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
return WavefrontOneAddressSpaceSSID;
}
/// \returns Single thread synchronization scope ID (single address space).
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
return SingleThreadOneAddressSpaceSSID;
}
/// In AMDGPU target synchronization scopes are inclusive, meaning a /// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization /// larger synchronization scope is inclusive of a smaller synchronization
@ -87,7 +132,11 @@ public:
if (!AIO || !BIO) if (!AIO || !BIO)
return None; return None;
return AIO.getValue() > BIO.getValue(); bool IsAOneAddressSpace = isOneAddressSpace(A);
bool IsBOneAddressSpace = isOneAddressSpace(B);
return AIO.getValue() >= BIO.getValue() &&
(IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
} }
}; };

View File

@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>> Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID, SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const { SIAtomicAddrSpace InstrScope) const {
/// TODO: For now assume OpenCL memory model which treats each if (SSID == SyncScope::System)
/// address space as having a separate happens-before relation, and return std::make_tuple(SIAtomicScope::SYSTEM,
/// so an instruction only has ordering with respect to the address SIAtomicAddrSpace::ATOMIC,
/// space it accesses, and if it accesses multiple address spaces it true);
/// does not require ordering of operations in different address if (SSID == MMI->getAgentSSID())
/// spaces. return std::make_tuple(SIAtomicScope::AGENT,
if (SSID == SyncScope::System) SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWorkgroupSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWavefrontSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == SyncScope::SingleThread)
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM, return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrScope,
false); false);
if (SSID == MMI->getAgentSSID()) if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT, return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrScope,
false); false);
if (SSID == MMI->getWorkgroupSSID()) if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP, return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrScope,
false); false);
if (SSID == MMI->getWavefrontSSID()) if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT, return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrScope,
false); false);
if (SSID == SyncScope::SingleThread) if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD, return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope, SIAtomicAddrSpace::ATOMIC & InstrScope,
false); false);
/// TODO: To support HSA Memory Model need to add additional memory
/// scopes that specify that do require cross address space
/// ordering.
return None; return None;
} }
@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VMCnt = false; bool VMCnt = false;
bool LGKMCnt = false; bool LGKMCnt = false;
bool EXPCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) { switch (Scope) {
case SIAtomicScope::SYSTEM: case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT: case SIAtomicScope::AGENT:
VMCnt = true; VMCnt |= true;
break; break;
case SIAtomicScope::WORKGROUP: case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT: case SIAtomicScope::WAVEFRONT:
@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/GDS memory as LDS operations // also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory // could be reordered with respect to later global/GDS memory
// operations of the same wave. // operations of the same wave.
LGKMCnt = IsCrossAddrSpaceOrdering; LGKMCnt |= IsCrossAddrSpaceOrdering;
break; break;
case SIAtomicScope::WAVEFRONT: case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD: case SIAtomicScope::SINGLETHREAD:
@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/LDS memory as GDS operations // also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory // could be reordered with respect to later global/LDS memory
// operations of the same wave. // operations of the same wave.
EXPCnt = IsCrossAddrSpaceOrdering; LGKMCnt |= IsCrossAddrSpaceOrdering;
break; break;
case SIAtomicScope::WORKGROUP: case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT: case SIAtomicScope::WAVEFRONT:
@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
} }
} }
if (VMCnt || LGKMCnt || EXPCnt) { if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate = unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV, AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV), VMCnt ? 0 : getVmcntBitMask(IV),
EXPCnt ? 0 : getExpcntBitMask(IV), getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV)); LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate); BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true; Changed = true;

View File

@ -12,8 +12,10 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_not_b32_e32 v1, v2 ; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1 ; GCN-NEXT: v_or_b32_e32 v1, -5, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 ; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] ; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]

View File

@ -27,9 +27,9 @@ bb:
%tmp1 = zext i32 %tmp to i64 %tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp %tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp
%tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4 %tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4
fence syncscope("workgroup") release fence syncscope("workgroup-one-as") release
tail call void @llvm.amdgcn.s.barrier() tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire fence syncscope("workgroup-one-as") acquire
%tmp4 = add nsw i32 %tmp3, %tmp3 %tmp4 = add nsw i32 %tmp3, %tmp3
%tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false) %tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false)
%tmp6 = add nsw i32 %tmp5, %tmp4 %tmp6 = add nsw i32 %tmp5, %tmp4

View File

@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 ; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 ; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64 ; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(1) ; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] ; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4 %idx.add = add nuw i32 %idx, 4
@ -49,6 +49,27 @@ define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace
ret void ret void
} }
; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
; VI-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
%a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
%a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
%a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
store float %a3, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64: ; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
; GCN: ds_read_b64 ; GCN: ds_read_b64
; GCN: v_add_f64 ; GCN: v_add_f64

View File

@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}system_acquire: ; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}} ; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() { define amdgpu_kernel void @system_acquire() {
@ -18,7 +18,7 @@ entry:
; FUNC-LABEL: {{^}}system_release: ; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_release() { define amdgpu_kernel void @system_release() {
entry: entry:
@ -29,7 +29,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel: ; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() { define amdgpu_kernel void @system_acq_rel() {
@ -41,7 +41,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst: ; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() { define amdgpu_kernel void @system_seq_cst() {
@ -50,6 +50,53 @@ entry:
ret void ret void
} }
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_acquire: ; FUNC-LABEL: {{^}}singlethread_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
@ -90,10 +137,50 @@ entry:
ret void ret void
} }
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_acquire: ; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}} ; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() { define amdgpu_kernel void @agent_acquire() {
@ -105,7 +192,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release: ; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_release() { define amdgpu_kernel void @agent_release() {
entry: entry:
@ -116,7 +203,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel: ; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() { define amdgpu_kernel void @agent_acq_rel() {
@ -128,7 +215,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst: ; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() { define amdgpu_kernel void @agent_seq_cst() {
@ -137,9 +224,56 @@ entry:
ret void ret void
} }
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_acquire: ; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() { define amdgpu_kernel void @workgroup_acquire() {
@ -150,7 +284,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release: ; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() { define amdgpu_kernel void @workgroup_release() {
@ -161,7 +295,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel: ; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() { define amdgpu_kernel void @workgroup_acq_rel() {
@ -172,7 +306,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst: ; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() { define amdgpu_kernel void @workgroup_seq_cst() {
@ -181,6 +315,50 @@ entry:
ret void ret void
} }
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_acquire: ; FUNC-LABEL: {{^}}wavefront_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
@ -220,3 +398,43 @@ entry:
fence syncscope("wavefront") seq_cst fence syncscope("wavefront") seq_cst
ret void ret void
} }
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; FUNC-LABEL: {{^}}system_acquire: ; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}} ; GFX6: s_waitcnt vmcnt(0){{$}}
@ -10,6 +10,232 @@
; GFX8: s_waitcnt vmcnt(0){{$}} ; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}} ; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() { define amdgpu_kernel void @system_acquire() {
entry: entry:
fence acquire fence acquire
@ -19,7 +245,7 @@ entry:
; FUNC-LABEL: {{^}}system_release: ; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_release() { define amdgpu_kernel void @system_release() {
entry: entry:
@ -30,7 +256,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel: ; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}} ; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}} ; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
@ -43,7 +269,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst: ; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}} ; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}} ; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
@ -96,9 +322,9 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire: ; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}} ; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}} ; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}} ; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}} ; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() { define amdgpu_kernel void @agent_acquire() {
@ -110,7 +336,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release: ; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_release() { define amdgpu_kernel void @agent_release() {
entry: entry:
@ -121,7 +347,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel: ; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}} ; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}} ; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
@ -134,7 +360,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst: ; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}} ; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}} ; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
@ -146,7 +372,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire: ; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}} ; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() { define amdgpu_kernel void @workgroup_acquire() {
@ -157,7 +383,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release: ; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0 ; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}} ; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() { define amdgpu_kernel void @workgroup_release() {
@ -168,7 +394,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel: ; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}} ; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() { define amdgpu_kernel void @workgroup_acq_rel() {
@ -179,7 +405,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst: ; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}} ; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() { define amdgpu_kernel void @workgroup_seq_cst() {

View File

@ -104,7 +104,7 @@ body: |
S_WAITCNT 127 S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
S_WAITCNT 3952 S_WAITCNT 3952
BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep) BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from %ir.gep)
bb.2.exit: bb.2.exit:
liveins: $sgpr2_sgpr3 liveins: $sgpr2_sgpr3

View File

@ -1,11 +1,311 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: {{^}}system_monotonic: ; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
ret void
}
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
ret void
}
; GCN-LABEL: {{^}}system_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
ret void
}
; GCN-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_release(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
ret void
}
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic( define amdgpu_kernel void @system_monotonic(
i32* %out, i32 %in) { i32* %out, i32 %in) {
entry: entry:
@ -14,9 +314,9 @@ entry:
} }
; GCN-LABEL: {{^}}system_acquire: ; GCN-LABEL: {{^}}system_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire( define amdgpu_kernel void @system_acquire(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -26,9 +326,9 @@ entry:
} }
; GCN-LABEL: {{^}}system_release: ; GCN-LABEL: {{^}}system_release:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release( define amdgpu_kernel void @system_release(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -38,9 +338,9 @@ entry:
} }
; GCN-LABEL: {{^}}system_acq_rel: ; GCN-LABEL: {{^}}system_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel( define amdgpu_kernel void @system_acq_rel(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -50,9 +350,9 @@ entry:
} }
; GCN-LABEL: {{^}}system_seq_cst: ; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst( define amdgpu_kernel void @system_seq_cst(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -62,9 +362,9 @@ entry:
} }
; GCN-LABEL: {{^}}singlethread_monotonic: ; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic( define amdgpu_kernel void @singlethread_monotonic(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -74,9 +374,9 @@ entry:
} }
; GCN-LABEL: {{^}}singlethread_acquire: ; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire( define amdgpu_kernel void @singlethread_acquire(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -86,9 +386,9 @@ entry:
} }
; GCN-LABEL: {{^}}singlethread_release: ; GCN-LABEL: {{^}}singlethread_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release( define amdgpu_kernel void @singlethread_release(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -98,9 +398,9 @@ entry:
} }
; GCN-LABEL: {{^}}singlethread_acq_rel: ; GCN-LABEL: {{^}}singlethread_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel( define amdgpu_kernel void @singlethread_acq_rel(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -110,9 +410,9 @@ entry:
} }
; GCN-LABEL: {{^}}singlethread_seq_cst: ; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst( define amdgpu_kernel void @singlethread_seq_cst(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -122,9 +422,9 @@ entry:
} }
; GCN-LABEL: {{^}}agent_monotonic: ; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic( define amdgpu_kernel void @agent_monotonic(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -134,9 +434,9 @@ entry:
} }
; GCN-LABEL: {{^}}agent_acquire: ; GCN-LABEL: {{^}}agent_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire( define amdgpu_kernel void @agent_acquire(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -146,9 +446,9 @@ entry:
} }
; GCN-LABEL: {{^}}agent_release: ; GCN-LABEL: {{^}}agent_release:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release( define amdgpu_kernel void @agent_release(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -158,9 +458,9 @@ entry:
} }
; GCN-LABEL: {{^}}agent_acq_rel: ; GCN-LABEL: {{^}}agent_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel( define amdgpu_kernel void @agent_acq_rel(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -170,9 +470,9 @@ entry:
} }
; GCN-LABEL: {{^}}agent_seq_cst: ; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst( define amdgpu_kernel void @agent_seq_cst(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -182,9 +482,9 @@ entry:
} }
; GCN-LABEL: {{^}}workgroup_monotonic: ; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic( define amdgpu_kernel void @workgroup_monotonic(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -194,9 +494,9 @@ entry:
} }
; GCN-LABEL: {{^}}workgroup_acquire: ; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}} ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol ; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire( define amdgpu_kernel void @workgroup_acquire(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -206,9 +506,9 @@ entry:
} }
; GCN-LABEL: {{^}}workgroup_release: ; GCN-LABEL: {{^}}workgroup_release:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}} ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release( define amdgpu_kernel void @workgroup_release(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -218,9 +518,9 @@ entry:
} }
; GCN-LABEL: {{^}}workgroup_acq_rel: ; GCN-LABEL: {{^}}workgroup_acq_rel:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}} ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GFX8-NOT: s_waitcnt vmcnt(0){{$}} ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol ; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel( define amdgpu_kernel void @workgroup_acq_rel(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -230,9 +530,9 @@ entry:
} }
; GCN-LABEL: {{^}}workgroup_seq_cst: ; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX8-NOT: s_waitcnt vmcnt(0){{$}} ; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol ; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst( define amdgpu_kernel void @workgroup_seq_cst(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -242,9 +542,9 @@ entry:
} }
; GCN-LABEL: {{^}}wavefront_monotonic: ; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic( define amdgpu_kernel void @wavefront_monotonic(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -254,9 +554,9 @@ entry:
} }
; GCN-LABEL: {{^}}wavefront_acquire: ; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire( define amdgpu_kernel void @wavefront_acquire(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -266,9 +566,9 @@ entry:
} }
; GCN-LABEL: {{^}}wavefront_release: ; GCN-LABEL: {{^}}wavefront_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release( define amdgpu_kernel void @wavefront_release(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -278,9 +578,9 @@ entry:
} }
; GCN-LABEL: {{^}}wavefront_acq_rel: ; GCN-LABEL: {{^}}wavefront_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel( define amdgpu_kernel void @wavefront_acq_rel(
i32* %out, i32 %in) { i32* %out, i32 %in) {
@ -290,9 +590,9 @@ entry:
} }
; GCN-LABEL: {{^}}wavefront_seq_cst: ; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol ; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst( define amdgpu_kernel void @wavefront_seq_cst(
i32* %out, i32 %in) { i32* %out, i32 %in) {

View File

@ -11,7 +11,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load seq_cst 4 from `i32 addrspace(42)* undef`) renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -47,7 +47,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`) FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -63,7 +63,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`) FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...

View File

@ -5,282 +5,282 @@
declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_unordered: ; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered( define amdgpu_kernel void @system_one_as_unordered(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in unordered, align 4 %val = load atomic i32, i32* %in syncscope("one-as") unordered, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}system_monotonic: ; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic( define amdgpu_kernel void @system_one_as_monotonic(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in monotonic, align 4 %val = load atomic i32, i32* %in syncscope("one-as") monotonic, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}system_acquire: ; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol ; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire( define amdgpu_kernel void @system_one_as_acquire(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in acquire, align 4 %val = load atomic i32, i32* %in syncscope("one-as") acquire, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}system_seq_cst: ; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol ; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst( define amdgpu_kernel void @system_one_as_seq_cst(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in seq_cst, align 4 %val = load atomic i32, i32* %in syncscope("one-as") seq_cst, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_unordered: ; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered( define amdgpu_kernel void @singlethread_one_as_unordered(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4 %val = load atomic i32, i32* %in syncscope("singlethread-one-as") unordered, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_monotonic: ; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic( define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4 %val = load atomic i32, i32* %in syncscope("singlethread-one-as") monotonic, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_acquire: ; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire( define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4 %val = load atomic i32, i32* %in syncscope("singlethread-one-as") acquire, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_seq_cst: ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst( define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4 %val = load atomic i32, i32* %in syncscope("singlethread-one-as") seq_cst, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}agent_unordered: ; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered( define amdgpu_kernel void @agent_one_as_unordered(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4 %val = load atomic i32, i32* %in syncscope("agent-one-as") unordered, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}agent_monotonic: ; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic( define amdgpu_kernel void @agent_one_as_monotonic(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4 %val = load atomic i32, i32* %in syncscope("agent-one-as") monotonic, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}agent_acquire: ; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol ; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire( define amdgpu_kernel void @agent_one_as_acquire(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4 %val = load atomic i32, i32* %in syncscope("agent-one-as") acquire, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}agent_seq_cst: ; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol ; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst( define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4 %val = load atomic i32, i32* %in syncscope("agent-one-as") seq_cst, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_unordered: ; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered( define amdgpu_kernel void @workgroup_one_as_unordered(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4 %val = load atomic i32, i32* %in syncscope("workgroup-one-as") unordered, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_monotonic: ; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic( define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4 %val = load atomic i32, i32* %in syncscope("workgroup-one-as") monotonic, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_acquire: ; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}} ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire( define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4 %val = load atomic i32, i32* %in syncscope("workgroup-one-as") acquire, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_seq_cst: ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}} ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}} ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst( define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4 %val = load atomic i32, i32* %in syncscope("workgroup-one-as") seq_cst, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_unordered: ; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered( define amdgpu_kernel void @wavefront_one_as_unordered(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4 %val = load atomic i32, i32* %in syncscope("wavefront-one-as") unordered, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_monotonic: ; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic( define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4 %val = load atomic i32, i32* %in syncscope("wavefront-one-as") monotonic, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_acquire: ; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire( define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4 %val = load atomic i32, i32* %in syncscope("wavefront-one-as") acquire, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_seq_cst: ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol ; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst( define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %in, i32* %out) { i32* %in, i32* %out) {
entry: entry:
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4 %val = load atomic i32, i32* %in syncscope("wavefront-one-as") seq_cst, align 4
store i32 %val, i32* %out store i32 %val, i32* %out
ret void ret void
} }
@ -374,4 +374,284 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
store i32 %val, i32* %out
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
i32* %in, i32* %out) {
entry:
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
!0 = !{i32 1} !0 = !{i32 1}

View File

@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(3)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -784,7 +784,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -806,7 +806,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -828,7 +828,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -850,7 +850,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -872,7 +872,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -894,7 +894,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -916,7 +916,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -938,7 +938,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -960,7 +960,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -982,7 +982,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1004,7 +1004,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1026,7 +1026,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1048,7 +1048,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...

View File

@ -3,12 +3,230 @@
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s ; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s ; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
; FUNC-LABEL: {{^}}system_acquire: ; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}} ; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
ret void
}
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() { define amdgpu_kernel void @system_acquire() {
entry: entry:
fence acquire fence acquire
@ -18,7 +236,7 @@ entry:
; FUNC-LABEL: {{^}}system_release: ; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_release() { define amdgpu_kernel void @system_release() {
entry: entry:
@ -29,7 +247,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel: ; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() { define amdgpu_kernel void @system_acq_rel() {
@ -41,7 +259,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst: ; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() { define amdgpu_kernel void @system_seq_cst() {
@ -93,7 +311,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire: ; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}} ; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() { define amdgpu_kernel void @agent_acquire() {
@ -105,7 +323,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release: ; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_release() { define amdgpu_kernel void @agent_release() {
entry: entry:
@ -116,7 +334,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel: ; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() { define amdgpu_kernel void @agent_acq_rel() {
@ -128,7 +346,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst: ; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}} ; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() { define amdgpu_kernel void @agent_seq_cst() {
@ -139,7 +357,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire: ; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() { define amdgpu_kernel void @workgroup_acquire() {
@ -150,7 +368,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release: ; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() { define amdgpu_kernel void @workgroup_release() {
@ -161,7 +379,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel: ; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() { define amdgpu_kernel void @workgroup_acq_rel() {
@ -172,7 +390,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst: ; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0 ; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE ; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() { define amdgpu_kernel void @workgroup_seq_cst() {

View File

@ -55,7 +55,7 @@ body: |
S_WAITCNT 127 S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`) $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952 S_WAITCNT 3952

View File

@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(2)* undef`) renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
--- ---
@ -783,7 +783,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -805,7 +805,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -827,7 +827,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -893,7 +893,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -915,7 +915,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(2)* undef`) DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -937,7 +937,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -959,7 +959,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -981,7 +981,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1003,7 +1003,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1025,7 +1025,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...
@ -1047,7 +1047,7 @@ body: |
$m0 = S_MOV_B32 -1 $m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`) $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0 S_ENDPGM 0
... ...

View File

@ -5,203 +5,203 @@
declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_unordered: ; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered( define amdgpu_kernel void @system_one_as_unordered(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out unordered, align 4 store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}system_monotonic: ; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic( define amdgpu_kernel void @system_one_as_monotonic(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out monotonic, align 4 store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}system_release: ; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release( define amdgpu_kernel void @system_one_as_release(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out release, align 4 store atomic i32 %in, i32* %out syncscope("one-as") release, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}system_seq_cst: ; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst( define amdgpu_kernel void @system_one_as_seq_cst(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out seq_cst, align 4 store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_unordered: ; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered( define amdgpu_kernel void @singlethread_one_as_unordered(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4 store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_monotonic: ; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic( define amdgpu_kernel void @singlethread_one_as_monotonic(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4 store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_release: ; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release( define amdgpu_kernel void @singlethread_one_as_release(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4 store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}singlethread_seq_cst: ; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst( define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4 store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}agent_unordered: ; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered( define amdgpu_kernel void @agent_one_as_unordered(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4 store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}agent_monotonic: ; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic( define amdgpu_kernel void @agent_one_as_monotonic(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4 store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}agent_release: ; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release( define amdgpu_kernel void @agent_one_as_release(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("agent") release, align 4 store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}agent_seq_cst: ; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst( define amdgpu_kernel void @agent_one_as_seq_cst(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4 store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_unordered: ; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered( define amdgpu_kernel void @workgroup_one_as_unordered(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4 store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_monotonic: ; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic( define amdgpu_kernel void @workgroup_one_as_monotonic(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4 store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_release: ; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}} ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release( define amdgpu_kernel void @workgroup_one_as_release(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4 store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}workgroup_seq_cst: ; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}} ; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst( define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4 store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_unordered: ; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered( define amdgpu_kernel void @wavefront_one_as_unordered(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4 store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_monotonic: ; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic( define amdgpu_kernel void @wavefront_one_as_monotonic(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4 store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_release: ; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release( define amdgpu_kernel void @wavefront_one_as_release(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4 store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4
ret void ret void
} }
; GCN-LABEL: {{^}}wavefront_seq_cst: ; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst( define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32 %in, i32* %out) { i32 %in, i32* %out) {
entry: entry:
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4 store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4
ret void ret void
} }
@ -295,4 +295,204 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out unordered, align 4
ret void
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}system_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out release, align 4
ret void
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @system_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
ret void
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @singlethread_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}agent_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
ret void
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @agent_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_release:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @workgroup_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_unordered(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_monotonic(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_release(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
ret void
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @wavefront_seq_cst(
i32 %in, i32* %out) {
entry:
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
ret void
}
!0 = !{i32 1} !0 = !{i32 1}