mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AMDGPU: Add support for cross address space synchronization scopes
Differential Revision: https://reviews.llvm.org/D59517 llvm-svn: 356946
This commit is contained in:
parent
a23410c78e
commit
5a27d2d078
@ -323,62 +323,80 @@ is conservatively correct for OpenCL.
|
||||
.. table:: AMDHSA LLVM Sync Scopes
|
||||
:name: amdgpu-amdhsa-llvm-sync-scopes-table
|
||||
|
||||
================ ==========================================================
|
||||
LLVM Sync Scope Description
|
||||
================ ==========================================================
|
||||
*none* The default: ``system``.
|
||||
======================= ===================================================
|
||||
LLVM Sync Scope Description
|
||||
======================= ===================================================
|
||||
*none* The default: ``system``.
|
||||
|
||||
Synchronizes with, and participates in modification and
|
||||
seq_cst total orderings with, other operations (except
|
||||
image operations) for all address spaces (except private,
|
||||
or generic that accesses private) provided the other
|
||||
operation's sync scope is:
|
||||
Synchronizes with, and participates in modification
|
||||
and seq_cst total orderings with, other operations
|
||||
(except image operations) for all address spaces
|
||||
(except private, or generic that accesses private)
|
||||
provided the other operation's sync scope is:
|
||||
|
||||
- ``system``.
|
||||
- ``agent`` and executed by a thread on the same agent.
|
||||
- ``workgroup`` and executed by a thread in the same
|
||||
workgroup.
|
||||
- ``wavefront`` and executed by a thread in the same
|
||||
wavefront.
|
||||
- ``system``.
|
||||
- ``agent`` and executed by a thread on the same
|
||||
agent.
|
||||
- ``workgroup`` and executed by a thread in the
|
||||
same workgroup.
|
||||
- ``wavefront`` and executed by a thread in the
|
||||
same wavefront.
|
||||
|
||||
``agent`` Synchronizes with, and participates in modification and
|
||||
seq_cst total orderings with, other operations (except
|
||||
image operations) for all address spaces (except private,
|
||||
or generic that accesses private) provided the other
|
||||
operation's sync scope is:
|
||||
``agent`` Synchronizes with, and participates in modification
|
||||
and seq_cst total orderings with, other operations
|
||||
(except image operations) for all address spaces
|
||||
(except private, or generic that accesses private)
|
||||
provided the other operation's sync scope is:
|
||||
|
||||
- ``system`` or ``agent`` and executed by a thread on the
|
||||
same agent.
|
||||
- ``workgroup`` and executed by a thread in the same
|
||||
workgroup.
|
||||
- ``wavefront`` and executed by a thread in the same
|
||||
wavefront.
|
||||
- ``system`` or ``agent`` and executed by a thread
|
||||
on the same agent.
|
||||
- ``workgroup`` and executed by a thread in the
|
||||
same workgroup.
|
||||
- ``wavefront`` and executed by a thread in the
|
||||
same wavefront.
|
||||
|
||||
``workgroup`` Synchronizes with, and participates in modification and
|
||||
seq_cst total orderings with, other operations (except
|
||||
image operations) for all address spaces (except private,
|
||||
or generic that accesses private) provided the other
|
||||
operation's sync scope is:
|
||||
``workgroup`` Synchronizes with, and participates in modification
|
||||
and seq_cst total orderings with, other operations
|
||||
(except image operations) for all address spaces
|
||||
(except private, or generic that accesses private)
|
||||
provided the other operation's sync scope is:
|
||||
|
||||
- ``system``, ``agent`` or ``workgroup`` and executed by a
|
||||
thread in the same workgroup.
|
||||
- ``wavefront`` and executed by a thread in the same
|
||||
wavefront.
|
||||
- ``system``, ``agent`` or ``workgroup`` and
|
||||
executed by a thread in the same workgroup.
|
||||
- ``wavefront`` and executed by a thread in the
|
||||
same wavefront.
|
||||
|
||||
``wavefront`` Synchronizes with, and participates in modification and
|
||||
seq_cst total orderings with, other operations (except
|
||||
image operations) for all address spaces (except private,
|
||||
or generic that accesses private) provided the other
|
||||
operation's sync scope is:
|
||||
``wavefront`` Synchronizes with, and participates in modification
|
||||
and seq_cst total orderings with, other operations
|
||||
(except image operations) for all address spaces
|
||||
(except private, or generic that accesses private)
|
||||
provided the other operation's sync scope is:
|
||||
|
||||
- ``system``, ``agent``, ``workgroup`` or ``wavefront``
|
||||
and executed by a thread in the same wavefront.
|
||||
- ``system``, ``agent``, ``workgroup`` or
|
||||
``wavefront`` and executed by a thread in the
|
||||
same wavefront.
|
||||
|
||||
``singlethread`` Only synchronizes with, and participates in modification
|
||||
and seq_cst total orderings with, other operations (except
|
||||
image operations) running in the same thread for all
|
||||
address spaces (for example, in signal handlers).
|
||||
================ ==========================================================
|
||||
``singlethread`` Only synchronizes with, and participates in
|
||||
modification and seq_cst total orderings with,
|
||||
other operations (except image operations) running
|
||||
in the same thread for all address spaces (for
|
||||
example, in signal handlers).
|
||||
|
||||
``one-as`` Same as ``system`` but only synchronizes with other
|
||||
operations within the same address space.
|
||||
|
||||
``agent-one-as`` Same as ``agent`` but only synchronizes with other
|
||||
operations within the same address space.
|
||||
|
||||
``workgroup-one-as`` Same as ``workgroup`` but only synchronizes with
|
||||
other operations within the same address space.
|
||||
|
||||
``wavefront-one-as`` Same as ``wavefront`` but only synchronizes with
|
||||
other operations within the same address space.
|
||||
|
||||
``singlethread-one-as`` Same as ``singlethread`` but only synchronizes with
|
||||
other operations within the same address space.
|
||||
======================= ===================================================
|
||||
|
||||
AMDGPU Intrinsics
|
||||
-----------------
|
||||
|
@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
|
||||
AgentSSID = CTX.getOrInsertSyncScopeID("agent");
|
||||
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
|
||||
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
|
||||
SystemOneAddressSpaceSSID =
|
||||
CTX.getOrInsertSyncScopeID("one-as");
|
||||
AgentOneAddressSpaceSSID =
|
||||
CTX.getOrInsertSyncScopeID("agent-one-as");
|
||||
WorkgroupOneAddressSpaceSSID =
|
||||
CTX.getOrInsertSyncScopeID("workgroup-one-as");
|
||||
WavefrontOneAddressSpaceSSID =
|
||||
CTX.getOrInsertSyncScopeID("wavefront-one-as");
|
||||
SingleThreadOneAddressSpaceSSID =
|
||||
CTX.getOrInsertSyncScopeID("singlethread-one-as");
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -29,12 +29,22 @@ private:
|
||||
// All supported memory/synchronization scopes can be found here:
|
||||
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
|
||||
|
||||
/// Agent synchronization scope ID.
|
||||
/// Agent synchronization scope ID (cross address space).
|
||||
SyncScope::ID AgentSSID;
|
||||
/// Workgroup synchronization scope ID.
|
||||
/// Workgroup synchronization scope ID (cross address space).
|
||||
SyncScope::ID WorkgroupSSID;
|
||||
/// Wavefront synchronization scope ID.
|
||||
/// Wavefront synchronization scope ID (cross address space).
|
||||
SyncScope::ID WavefrontSSID;
|
||||
/// System synchronization scope ID (single address space).
|
||||
SyncScope::ID SystemOneAddressSpaceSSID;
|
||||
/// Agent synchronization scope ID (single address space).
|
||||
SyncScope::ID AgentOneAddressSpaceSSID;
|
||||
/// Workgroup synchronization scope ID (single address space).
|
||||
SyncScope::ID WorkgroupOneAddressSpaceSSID;
|
||||
/// Wavefront synchronization scope ID (single address space).
|
||||
SyncScope::ID WavefrontOneAddressSpaceSSID;
|
||||
/// Single thread synchronization scope ID (single address space).
|
||||
SyncScope::ID SingleThreadOneAddressSpaceSSID;
|
||||
|
||||
/// In AMDGPU target synchronization scopes are inclusive, meaning a
|
||||
/// larger synchronization scope is inclusive of a smaller synchronization
|
||||
@ -43,35 +53,70 @@ private:
|
||||
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
|
||||
/// supported by the AMDGPU target.
|
||||
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
|
||||
if (SSID == SyncScope::SingleThread)
|
||||
if (SSID == SyncScope::SingleThread ||
|
||||
SSID == getSingleThreadOneAddressSpaceSSID())
|
||||
return 0;
|
||||
else if (SSID == getWavefrontSSID())
|
||||
else if (SSID == getWavefrontSSID() ||
|
||||
SSID == getWavefrontOneAddressSpaceSSID())
|
||||
return 1;
|
||||
else if (SSID == getWorkgroupSSID())
|
||||
else if (SSID == getWorkgroupSSID() ||
|
||||
SSID == getWorkgroupOneAddressSpaceSSID())
|
||||
return 2;
|
||||
else if (SSID == getAgentSSID())
|
||||
else if (SSID == getAgentSSID() ||
|
||||
SSID == getAgentOneAddressSpaceSSID())
|
||||
return 3;
|
||||
else if (SSID == SyncScope::System)
|
||||
else if (SSID == SyncScope::System ||
|
||||
SSID == getSystemOneAddressSpaceSSID())
|
||||
return 4;
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
/// \returns True if \p SSID is restricted to single address space, false
|
||||
/// otherwise
|
||||
bool isOneAddressSpace(SyncScope::ID SSID) const {
|
||||
return SSID == getSingleThreadOneAddressSpaceSSID() ||
|
||||
SSID == getWavefrontOneAddressSpaceSSID() ||
|
||||
SSID == getWorkgroupOneAddressSpaceSSID() ||
|
||||
SSID == getAgentOneAddressSpaceSSID() ||
|
||||
SSID == getSystemOneAddressSpaceSSID();
|
||||
}
|
||||
|
||||
public:
|
||||
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
|
||||
|
||||
/// \returns Agent synchronization scope ID.
|
||||
/// \returns Agent synchronization scope ID (cross address space).
|
||||
SyncScope::ID getAgentSSID() const {
|
||||
return AgentSSID;
|
||||
}
|
||||
/// \returns Workgroup synchronization scope ID.
|
||||
/// \returns Workgroup synchronization scope ID (cross address space).
|
||||
SyncScope::ID getWorkgroupSSID() const {
|
||||
return WorkgroupSSID;
|
||||
}
|
||||
/// \returns Wavefront synchronization scope ID.
|
||||
/// \returns Wavefront synchronization scope ID (cross address space).
|
||||
SyncScope::ID getWavefrontSSID() const {
|
||||
return WavefrontSSID;
|
||||
}
|
||||
/// \returns System synchronization scope ID (single address space).
|
||||
SyncScope::ID getSystemOneAddressSpaceSSID() const {
|
||||
return SystemOneAddressSpaceSSID;
|
||||
}
|
||||
/// \returns Agent synchronization scope ID (single address space).
|
||||
SyncScope::ID getAgentOneAddressSpaceSSID() const {
|
||||
return AgentOneAddressSpaceSSID;
|
||||
}
|
||||
/// \returns Workgroup synchronization scope ID (single address space).
|
||||
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
|
||||
return WorkgroupOneAddressSpaceSSID;
|
||||
}
|
||||
/// \returns Wavefront synchronization scope ID (single address space).
|
||||
SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
|
||||
return WavefrontOneAddressSpaceSSID;
|
||||
}
|
||||
/// \returns Single thread synchronization scope ID (single address space).
|
||||
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
|
||||
return SingleThreadOneAddressSpaceSSID;
|
||||
}
|
||||
|
||||
/// In AMDGPU target synchronization scopes are inclusive, meaning a
|
||||
/// larger synchronization scope is inclusive of a smaller synchronization
|
||||
@ -87,7 +132,11 @@ public:
|
||||
if (!AIO || !BIO)
|
||||
return None;
|
||||
|
||||
return AIO.getValue() > BIO.getValue();
|
||||
bool IsAOneAddressSpace = isOneAddressSpace(A);
|
||||
bool IsBOneAddressSpace = isOneAddressSpace(B);
|
||||
|
||||
return AIO.getValue() >= BIO.getValue() &&
|
||||
(IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
|
||||
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
|
||||
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
|
||||
SIAtomicAddrSpace InstrScope) const {
|
||||
/// TODO: For now assume OpenCL memory model which treats each
|
||||
/// address space as having a separate happens-before relation, and
|
||||
/// so an instruction only has ordering with respect to the address
|
||||
/// space it accesses, and if it accesses multiple address spaces it
|
||||
/// does not require ordering of operations in different address
|
||||
/// spaces.
|
||||
if (SSID == SyncScope::System)
|
||||
if (SSID == SyncScope::System)
|
||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
true);
|
||||
if (SSID == MMI->getAgentSSID())
|
||||
return std::make_tuple(SIAtomicScope::AGENT,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
true);
|
||||
if (SSID == MMI->getWorkgroupSSID())
|
||||
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
true);
|
||||
if (SSID == MMI->getWavefrontSSID())
|
||||
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
true);
|
||||
if (SSID == SyncScope::SingleThread)
|
||||
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
||||
SIAtomicAddrSpace::ATOMIC,
|
||||
true);
|
||||
if (SSID == MMI->getSystemOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::SYSTEM,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
false);
|
||||
if (SSID == MMI->getAgentSSID())
|
||||
if (SSID == MMI->getAgentOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::AGENT,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
false);
|
||||
if (SSID == MMI->getWorkgroupSSID())
|
||||
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::WORKGROUP,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
false);
|
||||
if (SSID == MMI->getWavefrontSSID())
|
||||
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::WAVEFRONT,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
false);
|
||||
if (SSID == SyncScope::SingleThread)
|
||||
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
|
||||
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
|
||||
SIAtomicAddrSpace::ATOMIC & InstrScope,
|
||||
false);
|
||||
/// TODO: To support HSA Memory Model need to add additional memory
|
||||
/// scopes that specify that do require cross address space
|
||||
/// ordering.
|
||||
return None;
|
||||
}
|
||||
|
||||
@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
||||
|
||||
bool VMCnt = false;
|
||||
bool LGKMCnt = false;
|
||||
bool EXPCnt = false;
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
VMCnt = true;
|
||||
VMCnt |= true;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
||||
// also synchronizing with global/GDS memory as LDS operations
|
||||
// could be reordered with respect to later global/GDS memory
|
||||
// operations of the same wave.
|
||||
LGKMCnt = IsCrossAddrSpaceOrdering;
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
||||
// also synchronizing with global/LDS memory as GDS operations
|
||||
// could be reordered with respect to later global/LDS memory
|
||||
// operations of the same wave.
|
||||
EXPCnt = IsCrossAddrSpaceOrdering;
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
||||
}
|
||||
}
|
||||
|
||||
if (VMCnt || LGKMCnt || EXPCnt) {
|
||||
if (VMCnt || LGKMCnt) {
|
||||
unsigned WaitCntImmediate =
|
||||
AMDGPU::encodeWaitcnt(IV,
|
||||
VMCnt ? 0 : getVmcntBitMask(IV),
|
||||
EXPCnt ? 0 : getExpcntBitMask(IV),
|
||||
getExpcntBitMask(IV),
|
||||
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
|
||||
Changed = true;
|
||||
|
@ -12,8 +12,10 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_not_b32_e32 v1, v2
|
||||
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_wbinvl1_vol
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
|
||||
|
@ -27,9 +27,9 @@ bb:
|
||||
%tmp1 = zext i32 %tmp to i64
|
||||
%tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp
|
||||
%tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4
|
||||
fence syncscope("workgroup") release
|
||||
fence syncscope("workgroup-one-as") release
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
fence syncscope("workgroup") acquire
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
%tmp4 = add nsw i32 %tmp3, %tmp3
|
||||
%tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false)
|
||||
%tmp6 = add nsw i32 %tmp5, %tmp4
|
||||
|
@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(float addrspace(3)* %ptr) nounwind {
|
||||
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
||||
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
|
||||
; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||
%idx.add = add nuw i32 %idx, 4
|
||||
@ -49,6 +49,27 @@ define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
|
||||
; VI-DAG: s_mov_b32 m0
|
||||
; GFX9-NOT: m0
|
||||
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
|
||||
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
|
||||
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
|
||||
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
|
||||
define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
|
||||
%idx.add = add nuw i32 %idx, 4
|
||||
%shl0 = shl i32 %idx.add, 3
|
||||
%shl1 = shl i32 %idx.add, 4
|
||||
%ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
|
||||
%ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
|
||||
%a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
|
||||
%a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
|
||||
%a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
|
||||
store float %a3, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
|
||||
; GCN: ds_read_b64
|
||||
; GCN: v_add_f64
|
||||
|
@ -6,7 +6,7 @@
|
||||
; FUNC-LABEL: {{^}}system_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_acquire() {
|
||||
@ -18,7 +18,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_release() {
|
||||
entry:
|
||||
@ -29,7 +29,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_acq_rel() {
|
||||
@ -41,7 +41,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_seq_cst() {
|
||||
@ -50,6 +50,53 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
@ -90,10 +137,50 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_acquire() {
|
||||
@ -105,7 +192,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_release() {
|
||||
entry:
|
||||
@ -116,7 +203,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_acq_rel() {
|
||||
@ -128,7 +215,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_seq_cst() {
|
||||
@ -137,9 +224,56 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acquire() {
|
||||
@ -150,7 +284,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_release() {
|
||||
@ -161,7 +295,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acq_rel() {
|
||||
@ -172,7 +306,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_seq_cst() {
|
||||
@ -181,6 +315,50 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
@ -220,3 +398,43 @@ entry:
|
||||
fence syncscope("wavefront") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
|
||||
|
||||
; FUNC-LABEL: {{^}}system_acquire:
|
||||
; FUNC-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GFX6: s_waitcnt vmcnt(0){{$}}
|
||||
@ -10,6 +10,232 @@
|
||||
; GFX8: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GFX6: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_acquire() {
|
||||
entry:
|
||||
fence acquire
|
||||
@ -19,7 +245,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_release() {
|
||||
entry:
|
||||
@ -30,7 +256,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
@ -43,7 +269,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
@ -96,9 +322,9 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GFX6: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_acquire() {
|
||||
@ -110,7 +336,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_release() {
|
||||
entry:
|
||||
@ -121,7 +347,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
@ -134,7 +360,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GCN: s_endpgm
|
||||
@ -146,7 +372,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acquire() {
|
||||
@ -157,7 +383,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_release() {
|
||||
@ -168,7 +394,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acq_rel() {
|
||||
@ -179,7 +405,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_seq_cst() {
|
||||
|
@ -104,7 +104,7 @@ body: |
|
||||
S_WAITCNT 127
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
S_WAITCNT 3952
|
||||
BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep)
|
||||
BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from %ir.gep)
|
||||
|
||||
bb.2.exit:
|
||||
liveins: $sgpr2_sgpr3
|
||||
|
@ -1,11 +1,311 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_one_as_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_one_as_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_one_as_release(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_acq_rel:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_one_as_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_one_as_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_one_as_release(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_one_as_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_one_as_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_one_as_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_one_as_release(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_acq_rel:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_one_as_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_one_as_release(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_one_as_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_one_as_release(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_one_as_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
entry:
|
||||
@ -14,9 +314,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
@ -26,9 +326,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_release(
|
||||
i32* %out, i32 %in) {
|
||||
@ -38,9 +338,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_acq_rel:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
@ -50,9 +350,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
@ -62,9 +362,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
@ -74,9 +374,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
@ -86,9 +386,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_release(
|
||||
i32* %out, i32 %in) {
|
||||
@ -98,9 +398,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_acq_rel:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
@ -110,9 +410,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
@ -122,9 +422,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
@ -134,9 +434,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
@ -146,9 +446,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_release(
|
||||
i32* %out, i32 %in) {
|
||||
@ -158,9 +458,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_acq_rel:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
@ -170,9 +470,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
@ -182,9 +482,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
@ -194,9 +494,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
@ -206,9 +506,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_release:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_release(
|
||||
i32* %out, i32 %in) {
|
||||
@ -218,9 +518,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
@ -230,9 +530,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
@ -242,9 +542,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
i32* %out, i32 %in) {
|
||||
@ -254,9 +554,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_acquire(
|
||||
i32* %out, i32 %in) {
|
||||
@ -266,9 +566,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_release(
|
||||
i32* %out, i32 %in) {
|
||||
@ -278,9 +578,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_acq_rel:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_acq_rel(
|
||||
i32* %out, i32 %in) {
|
||||
@ -290,9 +590,9 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: buffer_wbinvl1_vol
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
i32* %out, i32 %in) {
|
||||
|
@ -11,7 +11,7 @@ body: |
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
|
||||
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load seq_cst 4 from `i32 addrspace(42)* undef`)
|
||||
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -30,7 +30,7 @@ body: |
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`)
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
|
||||
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -63,7 +63,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`)
|
||||
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -5,282 +5,282 @@
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-LABEL: {{^}}system_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
define amdgpu_kernel void @system_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in unordered, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("one-as") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
define amdgpu_kernel void @system_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in monotonic, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("one-as") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_acquire:
|
||||
; GCN-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_acquire(
|
||||
define amdgpu_kernel void @system_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in acquire, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("one-as") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
define amdgpu_kernel void @system_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in seq_cst, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("one-as") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
define amdgpu_kernel void @singlethread_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_acquire:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_acquire(
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread-one-as") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-LABEL: {{^}}agent_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
define amdgpu_kernel void @agent_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("agent-one-as") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
define amdgpu_kernel void @agent_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_acquire:
|
||||
; GCN-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_acquire(
|
||||
define amdgpu_kernel void @agent_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("agent-one-as") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("agent-one-as") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
define amdgpu_kernel void @workgroup_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_acquire(
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup-one-as") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
define amdgpu_kernel void @wavefront_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_acquire:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_acquire(
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront-one-as") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
@ -374,4 +374,284 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
|
||||
store i32 %val, i32* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{i32 1}
|
||||
|
@ -17,7 +17,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -41,7 +41,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -65,7 +65,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -89,7 +89,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -113,7 +113,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -137,7 +137,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -161,7 +161,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -185,7 +185,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -209,7 +209,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -233,7 +233,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -257,7 +257,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -281,7 +281,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -305,7 +305,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -329,7 +329,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -353,7 +353,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -377,7 +377,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -401,7 +401,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -425,7 +425,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -449,7 +449,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -473,7 +473,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -498,7 +498,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -520,7 +520,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -542,7 +542,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -564,7 +564,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -586,7 +586,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -608,7 +608,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -630,7 +630,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -652,7 +652,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -674,7 +674,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -696,7 +696,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -718,7 +718,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -740,7 +740,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -762,7 +762,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -784,7 +784,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -806,7 +806,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -828,7 +828,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -850,7 +850,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -872,7 +872,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -894,7 +894,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -916,7 +916,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -938,7 +938,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -960,7 +960,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -982,7 +982,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1004,7 +1004,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1026,7 +1026,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1048,7 +1048,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -3,12 +3,230 @@
|
||||
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
|
||||
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
|
||||
|
||||
; FUNC-LABEL: {{^}}system_acquire:
|
||||
; FUNC-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}system_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_acquire() {
|
||||
entry:
|
||||
fence acquire
|
||||
@ -18,7 +236,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_release() {
|
||||
entry:
|
||||
@ -29,7 +247,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_acq_rel() {
|
||||
@ -41,7 +259,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @system_seq_cst() {
|
||||
@ -93,7 +311,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_acquire() {
|
||||
@ -105,7 +323,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_release() {
|
||||
entry:
|
||||
@ -116,7 +334,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_acq_rel() {
|
||||
@ -128,7 +346,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: buffer_wbinvl1{{$}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @agent_seq_cst() {
|
||||
@ -139,7 +357,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acquire() {
|
||||
@ -150,7 +368,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_release() {
|
||||
@ -161,7 +379,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_acq_rel() {
|
||||
@ -172,7 +390,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @workgroup_seq_cst() {
|
||||
|
@ -55,7 +55,7 @@ body: |
|
||||
S_WAITCNT 127
|
||||
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
|
||||
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
|
||||
S_WAITCNT 3952
|
||||
|
@ -17,7 +17,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -41,7 +41,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -65,7 +65,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -89,7 +89,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -113,7 +113,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -137,7 +137,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -161,7 +161,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -185,7 +185,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -209,7 +209,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -233,7 +233,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -257,7 +257,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -281,7 +281,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -305,7 +305,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -329,7 +329,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -353,7 +353,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -377,7 +377,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -401,7 +401,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -425,7 +425,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -449,7 +449,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -473,7 +473,7 @@ body: |
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
|
||||
@ -498,7 +498,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -520,7 +520,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -542,7 +542,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -564,7 +564,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -586,7 +586,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -608,7 +608,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -630,7 +630,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -652,7 +652,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -674,7 +674,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -696,7 +696,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -718,7 +718,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -740,7 +740,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -762,7 +762,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
@ -783,7 +783,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -805,7 +805,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -827,7 +827,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -893,7 +893,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -915,7 +915,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -937,7 +937,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -959,7 +959,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -981,7 +981,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1003,7 +1003,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1025,7 +1025,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -1047,7 +1047,7 @@ body: |
|
||||
$m0 = S_MOV_B32 -1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
$vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -5,203 +5,203 @@
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-LABEL: {{^}}system_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
define amdgpu_kernel void @system_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out unordered, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
define amdgpu_kernel void @system_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out monotonic, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_release:
|
||||
; GCN-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_release(
|
||||
define amdgpu_kernel void @system_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out release, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("one-as") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
define amdgpu_kernel void @system_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out seq_cst, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
define amdgpu_kernel void @singlethread_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_release:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_release(
|
||||
define amdgpu_kernel void @singlethread_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-LABEL: {{^}}agent_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
define amdgpu_kernel void @agent_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
define amdgpu_kernel void @agent_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_release:
|
||||
; GCN-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_release(
|
||||
define amdgpu_kernel void @agent_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
define amdgpu_kernel void @workgroup_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_release:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_release(
|
||||
define amdgpu_kernel void @workgroup_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
define amdgpu_kernel void @wavefront_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_release:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_release(
|
||||
define amdgpu_kernel void @wavefront_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -295,4 +295,204 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_release:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_release:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{i32 1}
|
||||
|
Loading…
Reference in New Issue
Block a user