mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AMDGPU] gfx1010 memory legalizer
Differential Revision: https://reviews.llvm.org/D61535 llvm-svn: 360087
This commit is contained in:
parent
55dff7252c
commit
32c2919cd9
@ -352,6 +352,40 @@ public:
|
||||
|
||||
};
|
||||
|
||||
class SIGfx10CacheControl : public SIGfx7CacheControl {
|
||||
protected:
|
||||
bool CuMode = false;
|
||||
|
||||
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
|
||||
/// is modified, false otherwise.
|
||||
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
|
||||
return enableNamedBit<AMDGPU::OpName::dlc>(MI);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
|
||||
SIGfx7CacheControl(ST), CuMode(CuMode) {};
|
||||
|
||||
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace) const override;
|
||||
|
||||
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
|
||||
|
||||
bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const override;
|
||||
|
||||
bool insertWait(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
SIMemOp Op,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const override;
|
||||
};
|
||||
|
||||
class SIMemoryLegalizer final : public MachineFunctionPass {
|
||||
private:
|
||||
|
||||
@ -623,7 +657,9 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
|
||||
GCNSubtarget::Generation Generation = ST.getGeneration();
|
||||
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return make_unique<SIGfx6CacheControl>(ST);
|
||||
return make_unique<SIGfx7CacheControl>(ST);
|
||||
if (Generation < AMDGPUSubtarget::GFX10)
|
||||
return make_unique<SIGfx7CacheControl>(ST);
|
||||
return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
|
||||
}
|
||||
|
||||
bool SIGfx6CacheControl::enableLoadCacheBypass(
|
||||
@ -860,6 +896,231 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx10CacheControl::enableLoadCacheBypass(
|
||||
const MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace) const {
|
||||
assert(MI->mayLoad() && !MI->mayStore());
|
||||
bool Changed = false;
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
/// TODO Do not set glc for rmw atomic operations as they
|
||||
/// implicitly bypass the L0/L1 caches.
|
||||
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
Changed |= enableGLCBit(MI);
|
||||
Changed |= enableDLCBit(MI);
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// In WGP mode the waves of a work-group can be executing on either CU of
|
||||
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
|
||||
// CU mode and all waves of a work-group are on the same CU, and so the
|
||||
// L0 does not need to be bypassed.
|
||||
if (!CuMode) Changed |= enableGLCBit(MI);
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// No cache to bypass.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
}
|
||||
|
||||
/// The scratch address space does not need the global memory caches
|
||||
/// to be bypassed as all memory operations by the same thread are
|
||||
/// sequentially consistent, and no other thread can access scratch
|
||||
/// memory.
|
||||
|
||||
/// Other address spaces do not hava a cache.
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx10CacheControl::enableNonTemporal(
|
||||
const MachineBasicBlock::iterator &MI) const {
|
||||
assert(MI->mayLoad() ^ MI->mayStore());
|
||||
bool Changed = false;
|
||||
|
||||
Changed |= enableSLCBit(MI);
|
||||
/// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
Position Pos) const {
|
||||
bool Changed = false;
|
||||
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
++MI;
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
|
||||
Changed = true;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// In WGP mode the waves of a work-group can be executing on either CU of
|
||||
// the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
|
||||
// in CU mode and all waves of a work-group are on the same CU, and so the
|
||||
// L0 does not need to be invalidated.
|
||||
if (!CuMode) {
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
|
||||
Changed = true;
|
||||
}
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// No cache to invalidate.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
}
|
||||
|
||||
/// The scratch address space does not need the global memory cache
|
||||
/// to be flushed as all memory operations by the same thread are
|
||||
/// sequentially consistent, and no other thread can access scratch
|
||||
/// memory.
|
||||
|
||||
/// Other address spaces do not hava a cache.
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
--MI;
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
|
||||
SIAtomicScope Scope,
|
||||
SIAtomicAddrSpace AddrSpace,
|
||||
SIMemOp Op,
|
||||
bool IsCrossAddrSpaceOrdering,
|
||||
Position Pos) const {
|
||||
bool Changed = false;
|
||||
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
++MI;
|
||||
|
||||
bool VMCnt = false;
|
||||
bool VSCnt = false;
|
||||
bool LGKMCnt = false;
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
|
||||
VMCnt |= true;
|
||||
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
|
||||
VSCnt |= true;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// In WGP mode the waves of a work-group can be executing on either CU of
|
||||
// the WGP. Therefore need to wait for operations to complete to ensure
|
||||
// they are visible to waves in the other CU as the L0 is per CU.
|
||||
// Otherwise in CU mode and all waves of a work-group are on the same CU
|
||||
// which shares the same L0.
|
||||
if (!CuMode) {
|
||||
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
|
||||
VMCnt |= true;
|
||||
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
|
||||
VSCnt |= true;
|
||||
}
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// The L0 cache keeps all memory operations in order for
|
||||
// work-items in the same wavefront.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
}
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
// If no cross address space ordering then an LDS waitcnt is not
|
||||
// needed as LDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/GDS memory as LDS operations
|
||||
// could be reordered with respect to later global/GDS memory
|
||||
// operations of the same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// The LDS keeps all memory operations in order for
|
||||
// the same wavesfront.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
}
|
||||
|
||||
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
|
||||
switch (Scope) {
|
||||
case SIAtomicScope::SYSTEM:
|
||||
case SIAtomicScope::AGENT:
|
||||
// If no cross address space ordering then an GDS waitcnt is not
|
||||
// needed as GDS operations for all waves are executed in a
|
||||
// total global ordering as observed by all waves. Required if
|
||||
// also synchronizing with global/LDS memory as GDS operations
|
||||
// could be reordered with respect to later global/LDS memory
|
||||
// operations of the same wave.
|
||||
LGKMCnt |= IsCrossAddrSpaceOrdering;
|
||||
break;
|
||||
case SIAtomicScope::WORKGROUP:
|
||||
case SIAtomicScope::WAVEFRONT:
|
||||
case SIAtomicScope::SINGLETHREAD:
|
||||
// The GDS keeps all memory operations in order for
|
||||
// the same work-group.
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported synchronization scope");
|
||||
}
|
||||
}
|
||||
|
||||
if (VMCnt || LGKMCnt) {
|
||||
unsigned WaitCntImmediate =
|
||||
AMDGPU::encodeWaitcnt(IV,
|
||||
VMCnt ? 0 : getVmcntBitMask(IV),
|
||||
getExpcntBitMask(IV),
|
||||
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
if (VSCnt) {
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
|
||||
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
|
||||
.addImm(0);
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
if (Pos == Position::AFTER)
|
||||
--MI;
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
|
||||
if (AtomicPseudoMIs.empty())
|
||||
return false;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,8 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX6,GFX68 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
|
||||
|
||||
; FUNC-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
@ -9,7 +11,15 @@
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
@ -20,7 +30,12 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
@ -31,9 +46,16 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_one_as_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
@ -44,9 +66,16 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
@ -57,6 +86,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
@ -67,6 +100,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
@ -77,6 +114,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
@ -87,6 +128,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
@ -100,7 +145,15 @@ entry:
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
@ -111,7 +164,12 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
@ -122,9 +180,16 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_one_as_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
@ -135,53 +200,99 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NOT: buffer_gl0_inv
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
@ -192,6 +303,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
@ -202,6 +317,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_release() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
@ -212,6 +331,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
@ -222,6 +345,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
@ -235,7 +362,15 @@ entry:
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_acquire() {
|
||||
entry:
|
||||
fence acquire
|
||||
@ -245,8 +380,15 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_release() {
|
||||
entry:
|
||||
fence release
|
||||
@ -256,10 +398,19 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_acq_rel() {
|
||||
entry:
|
||||
fence acq_rel
|
||||
@ -269,10 +420,19 @@ entry:
|
||||
; FUNC-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel system_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_seq_cst() {
|
||||
entry:
|
||||
fence seq_cst
|
||||
@ -283,6 +443,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_acquire() {
|
||||
entry:
|
||||
fence syncscope("singlethread") acquire
|
||||
@ -293,6 +457,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_release() {
|
||||
entry:
|
||||
fence syncscope("singlethread") release
|
||||
@ -303,6 +471,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("singlethread") acq_rel
|
||||
@ -313,6 +485,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel singlethread_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("singlethread") seq_cst
|
||||
@ -326,7 +502,15 @@ entry:
|
||||
; GFX6-NEXT: buffer_wbinvl1{{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_acquire() {
|
||||
entry:
|
||||
fence syncscope("agent") acquire
|
||||
@ -336,8 +520,15 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_release:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_release() {
|
||||
entry:
|
||||
fence syncscope("agent") release
|
||||
@ -347,10 +538,19 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel
|
||||
@ -360,54 +560,102 @@ entry:
|
||||
; FUNC-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX6: buffer_wbinvl1{{$}}
|
||||
; GFX8: buffer_wbinvl1_vol{{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10-NEXT: buffer_gl1_inv{{$}}
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel agent_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_acquire() {
|
||||
entry:
|
||||
fence syncscope("workgroup") acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_release:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10-NOT: buffer_gl0_inv
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_release() {
|
||||
entry:
|
||||
fence syncscope("workgroup") release
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_acq_rel:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; FUNC-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GCN: %bb.0
|
||||
; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: buffer_gl0_inv{{$}}
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel workgroup_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst
|
||||
@ -418,6 +666,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_acquire() {
|
||||
entry:
|
||||
fence syncscope("wavefront") acquire
|
||||
@ -428,6 +680,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_release() {
|
||||
entry:
|
||||
fence syncscope("wavefront") release
|
||||
@ -438,6 +694,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_acq_rel
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_acq_rel() {
|
||||
entry:
|
||||
fence syncscope("wavefront") acq_rel
|
||||
@ -448,6 +708,10 @@ entry:
|
||||
; GCN: %bb.0
|
||||
; GCN-NOT: ATOMIC_FENCE
|
||||
; GCN: s_endpgm
|
||||
; GFX10: .amdhsa_kernel wavefront_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_seq_cst() {
|
||||
entry:
|
||||
fence syncscope("wavefront") seq_cst
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
||||
; RUN: not llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: error: <unknown>:0:0: in function invalid_fence void (): Unsupported atomic synchronization scope
|
||||
define amdgpu_kernel void @invalid_fence() {
|
||||
|
@ -2,15 +2,24 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -21,10 +30,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -35,10 +52,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -49,10 +74,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -63,10 +96,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -77,10 +117,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -91,10 +138,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -105,10 +159,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -119,10 +180,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -133,10 +201,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -147,10 +223,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -161,10 +245,18 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -175,10 +267,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -187,12 +286,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -201,12 +309,23 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10CU-NOT: buffer_gl0_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -215,12 +334,26 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10CU-NOT: buffer_gl0_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -231,10 +364,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -245,10 +385,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -259,10 +406,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -273,10 +427,17 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -287,6 +448,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_private_0:
|
||||
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
||||
; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_private_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_private_0(
|
||||
i32 addrspace(5)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -297,6 +463,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_private_1:
|
||||
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
||||
; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_private_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_private_1(
|
||||
i32 addrspace(5)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -309,6 +480,10 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_global_0:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_global_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_global_0(
|
||||
i32 addrspace(1)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -320,6 +495,11 @@ entry:
|
||||
; GCN-LABEL: {{^}}nontemporal_global_1:
|
||||
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
||||
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
||||
; GFX10: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_global_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_global_1(
|
||||
i32 addrspace(1)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -332,6 +512,10 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_local_0:
|
||||
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_local_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_local_0(
|
||||
i32 addrspace(3)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -342,6 +526,10 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_local_1:
|
||||
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_local_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_local_1(
|
||||
i32 addrspace(3)* %in, i32* %out) {
|
||||
entry:
|
||||
@ -354,6 +542,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_flat_0:
|
||||
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
||||
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_flat_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_flat_0(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -364,6 +557,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_flat_1:
|
||||
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
||||
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_flat_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_flat_1(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -375,11 +573,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -389,11 +594,19 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -403,11 +616,20 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -417,11 +639,21 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel system_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -431,11 +663,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -445,11 +684,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -459,11 +705,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -473,11 +726,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel singlethread_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -487,11 +747,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -501,11 +768,19 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -515,11 +790,20 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -529,11 +813,21 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
|
||||
; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NEXT: buffer_wbinvl1_vol
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10-NEXT: buffer_gl0_inv
|
||||
; GFX10-NEXT: buffer_gl1_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel agent_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -543,11 +837,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -556,12 +857,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -570,12 +880,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_acquire:
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10CU-NOT: buffer_gl0_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -584,12 +903,25 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10CU-NOT: buffer_gl0_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel workgroup_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -599,11 +931,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -613,11 +952,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -627,11 +973,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_acquire:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_acquire
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_acquire(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -641,11 +994,18 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GFX89-NOT: buffer_wbinvl1_vol
|
||||
; GFX10-NOT: buffer_gl{{[01]}}_inv
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
; GFX10: .amdhsa_kernel wavefront_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
|
@ -2,12 +2,19 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -17,7 +24,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -27,7 +39,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -37,7 +54,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -47,7 +69,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -57,7 +84,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -67,7 +99,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -77,7 +114,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -87,7 +129,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -97,7 +144,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -107,7 +159,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_release:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -117,7 +174,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -127,7 +189,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -137,7 +204,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -145,9 +217,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -155,9 +235,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -167,7 +255,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -177,7 +270,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -187,7 +285,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -197,7 +300,12 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -207,6 +315,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_private_0:
|
||||
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
||||
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_private_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_private_0(
|
||||
i32* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
@ -217,6 +330,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_private_1:
|
||||
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
|
||||
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_private_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_private_1(
|
||||
i32* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
@ -230,6 +348,11 @@ entry:
|
||||
; GCN-LABEL: {{^}}nontemporal_global_0:
|
||||
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
||||
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
|
||||
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_global_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_global_0(
|
||||
i32* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
@ -241,6 +364,11 @@ entry:
|
||||
; GCN-LABEL: {{^}}nontemporal_global_1:
|
||||
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
||||
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
||||
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_global_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_global_1(
|
||||
i32* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
@ -253,6 +381,10 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_local_0:
|
||||
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_local_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_local_0(
|
||||
i32* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
@ -263,6 +395,10 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_local_1:
|
||||
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_local_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_local_1(
|
||||
i32* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
@ -275,6 +411,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_flat_0:
|
||||
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
||||
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_flat_0
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_flat_0(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -285,6 +426,11 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}nontemporal_flat_1:
|
||||
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
||||
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
|
||||
; GFX10: .amdhsa_kernel nontemporal_flat_1
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @nontemporal_flat_1(
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
@ -296,8 +442,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -306,8 +457,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -316,8 +472,14 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_release:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -326,8 +488,14 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}system_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel system_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @system_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -336,8 +504,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -346,8 +519,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -356,8 +534,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -366,8 +549,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel singlethread_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @singlethread_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -376,8 +564,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -386,8 +579,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -396,8 +594,14 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_release:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -406,8 +610,14 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}agent_seq_cst:
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
||||
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel agent_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @agent_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -416,8 +626,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -426,8 +641,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -435,9 +655,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GCN-LABEL: {{^}}workgroup_release:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -445,9 +673,17 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
||||
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel workgroup_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @workgroup_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -456,8 +692,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_unordered:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_unordered
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_unordered(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -466,8 +707,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_monotonic:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_monotonic
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_monotonic(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -476,8 +722,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_release:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_release
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_release(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
@ -486,8 +737,13 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
||||
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
||||
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
||||
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX10: .amdhsa_kernel wavefront_seq_cst
|
||||
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
||||
; GFX10-NOT: .amdhsa_memory_ordered 0
|
||||
define amdgpu_kernel void @wavefront_seq_cst(
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user