1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AMDGPU] gfx1010 memory legalizer

Differential Revision: https://reviews.llvm.org/D61535

llvm-svn: 360087
This commit is contained in:
Stanislav Mekhanoshin 2019-05-06 21:57:02 +00:00
parent 55dff7252c
commit 32c2919cd9
7 changed files with 4909 additions and 1009 deletions

View File

@ -352,6 +352,40 @@ public:
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
protected:
bool CuMode = false;
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit<AMDGPU::OpName::dlc>(MI);
}
public:
SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
SIGfx7CacheControl(ST), CuMode(CuMode) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
private:
@ -623,7 +657,9 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return make_unique<SIGfx6CacheControl>(ST);
return make_unique<SIGfx7CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)
return make_unique<SIGfx7CacheControl>(ST);
return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
@ -860,6 +896,231 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
return Changed;
}
bool SIGfx10CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
/// TODO Do not set glc for rmw atomic operations as they
/// implicitly bypass the L0/L1 caches.
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
// CU mode and all waves of a work-group are on the same CU, and so the
// L0 does not need to be bypassed.
if (!CuMode) Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// No cache to bypass.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
/// The scratch address space does not need the global memory caches
/// to be bypassed as all memory operations by the same thread are
/// sequentially consistent, and no other thread can access scratch
/// memory.
/// Other address spaces do not hava a cache.
return Changed;
}
bool SIGfx10CacheControl::enableNonTemporal(
const MachineBasicBlock::iterator &MI) const {
assert(MI->mayLoad() ^ MI->mayStore());
bool Changed = false;
Changed |= enableSLCBit(MI);
/// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
return Changed;
}
bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
// in CU mode and all waves of a work-group are on the same CU, and so the
// L0 does not need to be invalidated.
if (!CuMode) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
Changed = true;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// No cache to invalidate.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
/// The scratch address space does not need the global memory cache
/// to be flushed as all memory operations by the same thread are
/// sequentially consistent, and no other thread can access scratch
/// memory.
/// Other address spaces do not hava a cache.
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
bool VMCnt = false;
bool VSCnt = false;
bool LGKMCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
VSCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to wait for operations to complete to ensure
// they are visible to waves in the other CU as the L0 is per CU.
// Otherwise in CU mode and all waves of a work-group are on the same CU
// which shares the same L0.
if (!CuMode) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
VSCnt |= true;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The L0 cache keeps all memory operations in order for
// work-items in the same wavefront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
// If no cross address space ordering then an LDS waitcnt is not
// needed as LDS operations for all waves are executed in a
// total global ordering as observed by all waves. Required if
// also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory
// operations of the same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The LDS keeps all memory operations in order for
// the same wavesfront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
// If no cross address space ordering then an GDS waitcnt is not
// needed as GDS operations for all waves are executed in a
// total global ordering as observed by all waves. Required if
// also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory
// operations of the same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The GDS keeps all memory operations in order for
// the same work-group.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
}
if (VSCnt) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
Changed = true;
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,8 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX6,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
@ -9,7 +11,15 @@
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GFX10: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
@ -20,7 +30,12 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
@ -31,9 +46,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_one_as_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
@ -44,9 +66,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
@ -57,6 +86,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
@ -67,6 +100,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
@ -77,6 +114,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
@ -87,6 +128,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
@ -100,7 +145,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GFX10: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
@ -111,7 +164,12 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
@ -122,9 +180,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_one_as_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
@ -135,53 +200,99 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NOT: buffer_gl0_inv
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
@ -192,6 +303,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
@ -202,6 +317,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
@ -212,6 +331,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
@ -222,6 +345,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
@ -235,7 +362,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acquire() {
entry:
fence acquire
@ -245,8 +380,15 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_release() {
entry:
fence release
@ -256,10 +398,19 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acq_rel() {
entry:
fence acq_rel
@ -269,10 +420,19 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel system_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst() {
entry:
fence seq_cst
@ -283,6 +443,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acquire() {
entry:
fence syncscope("singlethread") acquire
@ -293,6 +457,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_release() {
entry:
fence syncscope("singlethread") release
@ -303,6 +471,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acq_rel() {
entry:
fence syncscope("singlethread") acq_rel
@ -313,6 +485,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel singlethread_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst() {
entry:
fence syncscope("singlethread") seq_cst
@ -326,7 +502,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acquire() {
entry:
fence syncscope("agent") acquire
@ -336,8 +520,15 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_release() {
entry:
fence syncscope("agent") release
@ -347,10 +538,19 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acq_rel() {
entry:
fence syncscope("agent") acq_rel
@ -360,54 +560,102 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GFX10-NEXT: buffer_gl0_inv{{$}}
; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
; GFX10: .amdhsa_kernel agent_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst() {
entry:
fence syncscope("agent") seq_cst
ret void
}
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acquire() {
entry:
fence syncscope("workgroup") acquire
ret void
}
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10-NOT: buffer_gl0_inv
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_release() {
entry:
fence syncscope("workgroup") release
ret void
}
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acq_rel() {
entry:
fence syncscope("workgroup") acq_rel
ret void
}
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: buffer_gl0_inv{{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel workgroup_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst() {
entry:
fence syncscope("workgroup") seq_cst
@ -418,6 +666,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire() {
entry:
fence syncscope("wavefront") acquire
@ -428,6 +680,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release() {
entry:
fence syncscope("wavefront") release
@ -438,6 +694,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_acq_rel
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acq_rel() {
entry:
fence syncscope("wavefront") acq_rel
@ -448,6 +708,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
; GFX10: .amdhsa_kernel wavefront_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst() {
entry:
fence syncscope("wavefront") seq_cst

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
; RUN: not llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: error: <unknown>:0:0: in function invalid_fence void (): Unsupported atomic synchronization scope
define amdgpu_kernel void @invalid_fence() {

View File

@ -2,15 +2,24 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_unordered(
i32* %in, i32* %out) {
entry:
@ -21,10 +30,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@ -35,10 +52,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire(
i32* %in, i32* %out) {
entry:
@ -49,10 +74,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@ -63,10 +96,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_unordered(
i32* %in, i32* %out) {
entry:
@ -77,10 +117,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@ -91,10 +138,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %in, i32* %out) {
entry:
@ -105,10 +159,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@ -119,10 +180,17 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_unordered(
i32* %in, i32* %out) {
entry:
@ -133,10 +201,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@ -147,10 +223,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acquire(
i32* %in, i32* %out) {
entry:
@ -161,10 +245,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@ -175,10 +267,17 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_unordered(
i32* %in, i32* %out) {
entry:
@ -187,12 +286,21 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@ -201,12 +309,23 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10WGP-NEXT: buffer_gl0_inv
; GFX10CU-NOT: buffer_gl0_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %in, i32* %out) {
entry:
@ -215,12 +334,26 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10WGP-NEXT: buffer_gl0_inv
; GFX10CU-NOT: buffer_gl0_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@ -231,10 +364,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_unordered(
i32* %in, i32* %out) {
entry:
@ -245,10 +385,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@ -259,10 +406,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_one_as_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %in, i32* %out) {
entry:
@ -273,10 +427,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@ -287,6 +448,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_0:
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_private_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_0(
i32 addrspace(5)* %in, i32* %out) {
entry:
@ -297,6 +463,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_1:
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_private_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_1(
i32 addrspace(5)* %in, i32* %out) {
entry:
@ -309,6 +480,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_0:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_0(
i32 addrspace(1)* %in, i32* %out) {
entry:
@ -320,6 +495,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_1(
i32 addrspace(1)* %in, i32* %out) {
entry:
@ -332,6 +512,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_0:
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel nontemporal_local_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_0(
i32 addrspace(3)* %in, i32* %out) {
entry:
@ -342,6 +526,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_1:
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel nontemporal_local_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_1(
i32 addrspace(3)* %in, i32* %out) {
entry:
@ -354,6 +542,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_0:
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_flat_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_0(
i32* %in, i32* %out) {
entry:
@ -364,6 +557,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_1:
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_flat_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_1(
i32* %in, i32* %out) {
entry:
@ -375,11 +573,18 @@ entry:
}
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_unordered(
i32* %in, i32* %out) {
entry:
@ -389,11 +594,19 @@ entry:
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_monotonic(
i32* %in, i32* %out) {
entry:
@ -403,11 +616,20 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acquire(
i32* %in, i32* %out) {
entry:
@ -417,11 +639,21 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel system_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst(
i32* %in, i32* %out) {
entry:
@ -431,11 +663,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_unordered(
i32* %in, i32* %out) {
entry:
@ -445,11 +684,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_monotonic(
i32* %in, i32* %out) {
entry:
@ -459,11 +705,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acquire(
i32* %in, i32* %out) {
entry:
@ -473,11 +726,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel singlethread_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst(
i32* %in, i32* %out) {
entry:
@ -487,11 +747,18 @@ entry:
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_unordered(
i32* %in, i32* %out) {
entry:
@ -501,11 +768,19 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_monotonic(
i32* %in, i32* %out) {
entry:
@ -515,11 +790,20 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acquire(
i32* %in, i32* %out) {
entry:
@ -529,11 +813,21 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10-NEXT: buffer_gl0_inv
; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel agent_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst(
i32* %in, i32* %out) {
entry:
@ -543,11 +837,18 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_unordered(
i32* %in, i32* %out) {
entry:
@ -556,12 +857,21 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_monotonic(
i32* %in, i32* %out) {
entry:
@ -570,12 +880,21 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_acquire:
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10WGP-NEXT: buffer_gl0_inv
; GFX10CU-NOT: buffer_gl0_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acquire(
i32* %in, i32* %out) {
entry:
@ -584,12 +903,25 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10WGP-NEXT: buffer_gl0_inv
; GFX10CU-NOT: buffer_gl0_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel workgroup_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst(
i32* %in, i32* %out) {
entry:
@ -599,11 +931,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_unordered(
i32* %in, i32* %out) {
entry:
@ -613,11 +952,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic(
i32* %in, i32* %out) {
entry:
@ -627,11 +973,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_acquire
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire(
i32* %in, i32* %out) {
entry:
@ -641,11 +994,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
; GFX10: .amdhsa_kernel wavefront_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst(
i32* %in, i32* %out) {
entry:

View File

@ -2,12 +2,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_unordered(
i32 %in, i32* %out) {
entry:
@ -17,7 +24,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@ -27,7 +39,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release(
i32 %in, i32* %out) {
entry:
@ -37,7 +54,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@ -47,7 +69,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_unordered(
i32 %in, i32* %out) {
entry:
@ -57,7 +84,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@ -67,7 +99,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release(
i32 %in, i32* %out) {
entry:
@ -77,7 +114,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@ -87,7 +129,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_unordered(
i32 %in, i32* %out) {
entry:
@ -97,7 +144,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@ -107,7 +159,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_release(
i32 %in, i32* %out) {
entry:
@ -117,7 +174,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@ -127,7 +189,12 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_unordered(
i32 %in, i32* %out) {
entry:
@ -137,7 +204,12 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@ -145,9 +217,17 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_release(
i32 %in, i32* %out) {
entry:
@ -155,9 +235,17 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@ -167,7 +255,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_one_as_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_unordered(
i32 %in, i32* %out) {
entry:
@ -177,7 +270,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@ -187,7 +285,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_one_as_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_release(
i32 %in, i32* %out) {
entry:
@ -197,7 +300,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@ -207,6 +315,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_0:
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_private_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_0(
i32* %in, i32 addrspace(5)* %out) {
entry:
@ -217,6 +330,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_1:
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_private_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_1(
i32* %in, i32 addrspace(5)* %out) {
entry:
@ -230,6 +348,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_0:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_0(
i32* %in, i32 addrspace(1)* %out) {
entry:
@ -241,6 +364,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_1(
i32* %in, i32 addrspace(1)* %out) {
entry:
@ -253,6 +381,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_0:
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel nontemporal_local_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_0(
i32* %in, i32 addrspace(3)* %out) {
entry:
@ -263,6 +395,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_1:
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel nontemporal_local_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_1(
i32* %in, i32 addrspace(3)* %out) {
entry:
@ -275,6 +411,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_0:
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_flat_0
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_0(
i32* %in, i32* %out) {
entry:
@ -285,6 +426,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_1:
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_flat_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_1(
i32* %in, i32* %out) {
entry:
@ -296,8 +442,13 @@ entry:
}
; GCN-LABEL: {{^}}system_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_unordered(
i32 %in, i32* %out) {
entry:
@ -306,8 +457,13 @@ entry:
}
; GCN-LABEL: {{^}}system_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_monotonic(
i32 %in, i32* %out) {
entry:
@ -316,8 +472,14 @@ entry:
}
; GCN-LABEL: {{^}}system_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_release(
i32 %in, i32* %out) {
entry:
@ -326,8 +488,14 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel system_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst(
i32 %in, i32* %out) {
entry:
@ -336,8 +504,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_unordered(
i32 %in, i32* %out) {
entry:
@ -346,8 +519,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_monotonic(
i32 %in, i32* %out) {
entry:
@ -356,8 +534,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_release(
i32 %in, i32* %out) {
entry:
@ -366,8 +549,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel singlethread_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst(
i32 %in, i32* %out) {
entry:
@ -376,8 +564,13 @@ entry:
}
; GCN-LABEL: {{^}}agent_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_unordered(
i32 %in, i32* %out) {
entry:
@ -386,8 +579,13 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_monotonic(
i32 %in, i32* %out) {
entry:
@ -396,8 +594,14 @@ entry:
}
; GCN-LABEL: {{^}}agent_release:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_release(
i32 %in, i32* %out) {
entry:
@ -406,8 +610,14 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel agent_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst(
i32 %in, i32* %out) {
entry:
@ -416,8 +626,13 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_unordered(
i32 %in, i32* %out) {
entry:
@ -426,8 +641,13 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_monotonic(
i32 %in, i32* %out) {
entry:
@ -435,9 +655,17 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_release:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GCN-LABEL: {{^}}workgroup_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_release(
i32 %in, i32* %out) {
entry:
@ -445,9 +673,17 @@ entry:
ret void
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GCN-LABEL: {{^}}workgroup_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel workgroup_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst(
i32 %in, i32* %out) {
entry:
@ -456,8 +692,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_unordered:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_unordered
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_unordered(
i32 %in, i32* %out) {
entry:
@ -466,8 +707,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_monotonic
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic(
i32 %in, i32* %out) {
entry:
@ -476,8 +722,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_release
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release(
i32 %in, i32* %out) {
entry:
@ -486,8 +737,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX10: .amdhsa_kernel wavefront_seq_cst
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst(
i32 %in, i32* %out) {
entry: