mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Correct rmw atomics s_waitcnt generation
The AMD GPU SIMemoryLegalizer was using the ordering address space rather than the instruction address space when determining the s_waitcnt to generate to ensure that a read-modify-write atomic has completed. This resulted in additional unnecessary counters being waited on. Differential Revision: https://reviews.llvm.org/D96743
This commit is contained in:
parent
bb770362c3
commit
efe0f85e7b
@ -1407,7 +1407,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
|
||||
MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
|
||||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
|
||||
Changed |= CC->insertWait(MI, MOI.getScope(),
|
||||
MOI.getOrderingAddrSpace(),
|
||||
MOI.getInstrAddrSpace(),
|
||||
isAtomicRet(*MI) ? SIMemOp::LOAD :
|
||||
SIMemOp::STORE,
|
||||
MOI.getIsCrossAddressSpaceOrdering(),
|
||||
|
@ -928,7 +928,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* in
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -943,7 +943,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* in
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
|
||||
@ -966,7 +966,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -984,7 +984,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
|
||||
@ -1003,7 +1003,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %p
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1018,7 +1018,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %p
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1038,7 +1038,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1053,7 +1053,7 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1075,7 +1075,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* i
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1092,7 +1092,7 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* i
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1113,7 +1113,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inre
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1129,7 +1129,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inre
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1154,7 +1154,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1173,7 +1173,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1194,7 +1194,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1209,7 +1209,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1230,7 +1230,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1245,7 +1245,7 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
@ -1268,7 +1268,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inr
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
@ -1285,7 +1285,7 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inr
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
|
@ -117,7 +117,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u32 v1, v2, v1
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB0_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -147,7 +147,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u32 v1, v2, v1
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB0_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -285,7 +285,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u32 v1, v1, v2
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB1_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -319,7 +319,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u32 v1, v1, v2
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB1_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -503,7 +503,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB2_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -554,7 +554,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB2_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -739,7 +739,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB3_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -790,7 +790,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB3_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -975,7 +975,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB4_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1026,7 +1026,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB4_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1166,7 +1166,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB5_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1198,7 +1198,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB5_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1372,7 +1372,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB6_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1413,7 +1413,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB6_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1494,7 +1494,7 @@ define amdgpu_kernel void @add_i64_varying(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_add_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
@ -1509,7 +1509,7 @@ define amdgpu_kernel void @add_i64_varying(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_add_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
@ -1629,7 +1629,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u32 v1, v2, v1
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB8_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1659,7 +1659,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u32 v1, v2, v1
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB8_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1797,7 +1797,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u32 v1, v1, v2
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB9_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -1831,7 +1831,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u32 v1, v1, v2
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB9_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2015,7 +2015,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB10_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2066,7 +2066,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB10_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2208,7 +2208,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB11_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2242,7 +2242,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB11_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2418,7 +2418,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB12_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2459,7 +2459,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u64 v[1:2], v3, v[1:2]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB12_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2540,7 +2540,7 @@ define amdgpu_kernel void @sub_i64_varying(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
@ -2555,7 +2555,7 @@ define amdgpu_kernel void @sub_i64_varying(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
|
||||
; GFX1032-NEXT: s_endpgm
|
||||
@ -2732,7 +2732,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_and_rtn_b32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB14_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2783,7 +2783,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_and_rtn_b32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB14_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -2968,7 +2968,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_or_rtn_b32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB15_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3019,7 +3019,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_or_rtn_b32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB15_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3204,7 +3204,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_xor_rtn_b32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB16_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3255,7 +3255,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_xor_rtn_b32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB16_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3442,7 +3442,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_max_rtn_i32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB17_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3495,7 +3495,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_max_rtn_i32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB17_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3638,7 +3638,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB18_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3671,7 +3671,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB18_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3861,7 +3861,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_min_rtn_i32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB19_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -3914,7 +3914,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_min_rtn_i32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB19_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4057,7 +4057,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB20_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4090,7 +4090,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB20_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4278,7 +4278,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_max_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB21_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4329,7 +4329,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_max_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB21_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4469,7 +4469,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB22_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4502,7 +4502,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB22_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4690,7 +4690,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_min_rtn_u32 v0, v7, v4
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB23_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4741,7 +4741,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_min_rtn_u32 v0, v7, v4
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB23_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4881,7 +4881,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1064-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: buffer_gl0_inv
|
||||
; GFX1064-NEXT: BB24_2:
|
||||
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
|
||||
@ -4914,7 +4914,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1032-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1]
|
||||
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: buffer_gl0_inv
|
||||
; GFX1032-NEXT: BB24_2:
|
||||
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
|
||||
|
@ -42,7 +42,7 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
|
||||
; GCN-NEXT: v_or_b32_e32 v2, -5, v2
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_wbinvl1_vol
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -647,7 +647,7 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -660,7 +660,7 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -673,7 +673,6 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -688,7 +687,6 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -703,7 +701,7 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -790,7 +788,7 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -804,7 +802,7 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -819,7 +817,6 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -836,7 +833,6 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -852,7 +848,7 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -871,7 +867,7 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -885,7 +881,7 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -900,7 +896,6 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -917,7 +912,6 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -933,7 +927,7 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -951,7 +945,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -965,7 +959,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -979,7 +973,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -994,7 +988,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1009,7 +1003,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1030,7 +1024,7 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -1045,7 +1039,7 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -1061,7 +1055,7 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1078,7 +1072,7 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1094,7 +1088,7 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1115,7 +1109,7 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -1130,7 +1124,7 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -1146,7 +1140,7 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1163,7 +1157,7 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1179,7 +1173,7 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1269,7 +1263,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1285,7 +1279,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1299,7 +1293,6 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1315,7 +1308,6 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1331,7 +1323,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1428,7 +1420,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1445,7 +1437,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1461,7 +1453,6 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1479,7 +1470,6 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1496,7 +1486,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1517,7 +1507,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1534,7 +1524,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1550,7 +1540,6 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1568,7 +1557,6 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1585,7 +1573,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1605,7 +1593,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1621,7 +1609,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1635,7 +1623,6 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1651,7 +1638,6 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1667,7 +1653,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1688,7 +1674,7 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1705,7 +1691,7 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1721,7 +1707,6 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1739,7 +1724,6 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1756,7 +1740,7 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1777,7 +1761,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1794,7 +1778,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1810,7 +1794,6 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1828,7 +1811,6 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1845,7 +1827,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1866,7 +1848,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1883,7 +1865,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1899,7 +1881,6 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1917,7 +1898,6 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1934,7 +1914,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1955,7 +1935,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1972,7 +1952,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1988,7 +1968,6 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -2006,7 +1985,6 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -2023,7 +2001,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -2043,7 +2021,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2060,7 +2038,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2077,7 +2055,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2093,7 +2071,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2109,7 +2087,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2133,7 +2111,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2151,7 +2129,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2170,7 +2148,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2188,7 +2166,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2205,7 +2183,7 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2229,7 +2207,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2247,7 +2225,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2266,7 +2244,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2284,7 +2262,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2301,7 +2279,7 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2324,7 +2302,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2341,7 +2319,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2358,7 +2336,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2374,7 +2352,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2390,7 +2368,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2414,7 +2392,7 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2432,7 +2410,7 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2451,7 +2429,7 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2469,7 +2447,7 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2486,7 +2464,7 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2510,7 +2488,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2528,7 +2506,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2547,7 +2525,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2565,7 +2543,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2582,7 +2560,7 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2606,7 +2584,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2624,7 +2602,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2643,7 +2621,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2661,7 +2639,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2678,7 +2656,7 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2702,7 +2680,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2720,7 +2698,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2739,7 +2717,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2757,7 +2735,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2774,7 +2752,7 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
|
@ -647,7 +647,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -660,7 +660,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -673,7 +673,6 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -688,7 +687,6 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -703,7 +701,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -790,7 +788,7 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -804,7 +802,7 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -819,7 +817,6 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -836,7 +833,6 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -852,7 +848,7 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -871,7 +867,7 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -885,7 +881,7 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -900,7 +896,6 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -917,7 +912,6 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -933,7 +927,7 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -951,7 +945,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -965,7 +959,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -979,7 +973,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -994,7 +988,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1009,7 +1003,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1030,7 +1024,7 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -1045,7 +1039,7 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -1061,7 +1055,7 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1078,7 +1072,7 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1094,7 +1088,7 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1115,7 +1109,7 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -1130,7 +1124,7 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -1146,7 +1140,7 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1163,7 +1157,7 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
@ -1179,7 +1173,7 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1269,7 +1263,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1285,7 +1279,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1299,7 +1293,6 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1315,7 +1308,6 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1331,7 +1323,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1428,7 +1420,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1445,7 +1437,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1461,7 +1453,6 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1479,7 +1470,6 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1496,7 +1486,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1517,7 +1507,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1534,7 +1524,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1550,7 +1540,6 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1568,7 +1557,6 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1585,7 +1573,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1605,7 +1593,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1621,7 +1609,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1635,7 +1623,6 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1651,7 +1638,6 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1667,7 +1653,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1688,7 +1674,7 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1705,7 +1691,7 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1721,7 +1707,6 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1739,7 +1724,6 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1756,7 +1740,7 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1777,7 +1761,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1794,7 +1778,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1810,7 +1794,6 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1828,7 +1811,6 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1845,7 +1827,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1866,7 +1848,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1883,7 +1865,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1899,7 +1881,6 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -1917,7 +1898,6 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -1934,7 +1914,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1955,7 +1935,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1972,7 +1952,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1988,7 +1968,6 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
@ -2006,7 +1985,6 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
@ -2023,7 +2001,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -2043,7 +2021,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2060,7 +2038,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2077,7 +2055,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2093,7 +2071,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2109,7 +2087,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2133,7 +2111,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2151,7 +2129,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2170,7 +2148,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2188,7 +2166,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2205,7 +2183,7 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2229,7 +2207,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2247,7 +2225,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2266,7 +2244,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2284,7 +2262,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2301,7 +2279,7 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2324,7 +2302,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2341,7 +2319,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2358,7 +2336,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2374,7 +2352,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2390,7 +2368,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2414,7 +2392,7 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2432,7 +2410,7 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2451,7 +2429,7 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2469,7 +2447,7 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2486,7 +2464,7 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2510,7 +2488,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2528,7 +2506,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2547,7 +2525,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2565,7 +2543,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2582,7 +2560,7 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2606,7 +2584,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2624,7 +2602,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2643,7 +2621,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2661,7 +2639,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2678,7 +2656,7 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2702,7 +2680,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
@ -2720,7 +2698,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1_vol
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -2739,7 +2717,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2757,7 +2735,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: buffer_gl0_inv
|
||||
; GFX10-CU-NEXT: buffer_gl1_inv
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
@ -2774,7 +2752,7 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
|
@ -634,7 +634,6 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acquire_atomicrmw:
|
||||
@ -646,7 +645,6 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acquire_atomicrmw:
|
||||
@ -658,7 +656,6 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -672,7 +669,6 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_atomicrmw:
|
||||
@ -684,7 +680,6 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -770,7 +765,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acq_rel_atomicrmw:
|
||||
@ -783,7 +777,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acq_rel_atomicrmw:
|
||||
@ -797,7 +790,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -812,7 +804,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_atomicrmw:
|
||||
@ -825,7 +816,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -844,7 +834,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_seq_cst_atomicrmw:
|
||||
@ -857,7 +846,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_seq_cst_atomicrmw:
|
||||
@ -871,7 +859,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -886,7 +873,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_atomicrmw:
|
||||
@ -899,7 +885,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -917,7 +902,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -930,7 +915,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -943,7 +928,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -957,7 +942,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -970,7 +955,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -991,7 +976,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1005,7 +990,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1020,7 +1005,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1035,7 +1020,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -1049,7 +1034,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1070,7 +1055,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1084,7 +1069,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
@ -1099,7 +1084,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1114,7 +1099,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -1128,7 +1113,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in) {
|
||||
@ -1218,7 +1203,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
|
||||
@ -1233,7 +1217,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
|
||||
@ -1246,7 +1229,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1261,7 +1243,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
|
||||
@ -1274,7 +1255,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1370,7 +1350,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
|
||||
@ -1386,7 +1365,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
|
||||
@ -1401,7 +1379,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1417,7 +1394,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
|
||||
@ -1431,7 +1407,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1452,7 +1427,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
|
||||
@ -1468,7 +1442,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
|
||||
@ -1483,7 +1456,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1499,7 +1471,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
|
||||
@ -1513,7 +1484,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1533,7 +1503,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acquire_acquire_cmpxchg:
|
||||
@ -1548,7 +1517,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acquire_acquire_cmpxchg:
|
||||
@ -1561,7 +1529,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1576,7 +1543,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_cmpxchg:
|
||||
@ -1589,7 +1555,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1610,7 +1575,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_release_acquire_cmpxchg:
|
||||
@ -1626,7 +1590,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_release_acquire_cmpxchg:
|
||||
@ -1641,7 +1604,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1657,7 +1619,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_cmpxchg:
|
||||
@ -1671,7 +1632,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1692,7 +1652,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
|
||||
@ -1708,7 +1667,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
|
||||
@ -1723,7 +1681,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1739,7 +1696,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
|
||||
@ -1753,7 +1709,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1774,7 +1729,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
|
||||
@ -1790,7 +1744,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
|
||||
@ -1805,7 +1758,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1821,7 +1773,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
|
||||
@ -1835,7 +1786,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1856,7 +1806,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
|
||||
@ -1872,7 +1821,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3]
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-WGP-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
|
||||
@ -1887,7 +1835,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1903,7 +1850,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
|
||||
@ -1917,7 +1863,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
@ -1937,7 +1882,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -1953,7 +1898,6 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -1970,7 +1914,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1985,7 +1929,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -1999,7 +1943,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2023,7 +1967,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2040,7 +1984,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2059,7 +2002,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2075,7 +2018,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2090,7 +2033,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2114,7 +2057,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2131,7 +2074,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2150,7 +2092,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2166,7 +2108,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2181,7 +2123,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2204,7 +2146,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2220,7 +2162,6 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2237,7 +2178,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2252,7 +2193,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2266,7 +2207,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2290,7 +2231,7 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2307,7 +2248,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2326,7 +2266,7 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2342,7 +2282,7 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2357,7 +2297,7 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2381,7 +2321,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2398,7 +2338,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2417,7 +2356,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2433,7 +2372,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2448,7 +2387,7 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2472,7 +2411,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2489,7 +2428,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2508,7 +2446,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2524,7 +2462,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2539,7 +2477,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
@ -2563,7 +2501,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
@ -2580,7 +2518,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2599,7 +2536,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2615,7 +2552,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1]
|
||||
; GFX10-CU-NEXT: s_endpgm
|
||||
;
|
||||
@ -2630,7 +2567,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %out, i32 %in, i32 %old) {
|
||||
|
@ -595,7 +595,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -721,7 +721,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -788,7 +788,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -853,7 +853,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -924,7 +924,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -997,7 +997,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1130,7 +1130,6 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1271,7 +1270,6 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1346,7 +1344,6 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1417,7 +1414,6 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1490,7 +1486,6 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1565,7 +1560,6 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1640,7 +1634,6 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1715,7 +1708,6 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1787,7 +1779,7 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1866,7 +1858,7 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1947,7 +1939,7 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2024,7 +2016,7 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2103,7 +2095,7 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2184,7 +2176,7 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2265,7 +2257,7 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2346,7 +2338,7 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
@ -595,7 +595,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -721,7 +721,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -788,7 +788,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -853,7 +853,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -924,7 +924,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -997,7 +997,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1130,7 +1130,6 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1271,7 +1270,6 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1346,7 +1344,6 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1417,7 +1414,6 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1490,7 +1486,6 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1565,7 +1560,6 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1640,7 +1634,6 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1715,7 +1708,6 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1787,7 +1779,7 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1866,7 +1858,7 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1947,7 +1939,7 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2024,7 +2016,7 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2103,7 +2095,7 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2184,7 +2176,7 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2265,7 +2257,7 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2346,7 +2338,7 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
@ -595,7 +595,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -721,7 +721,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -788,7 +788,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -853,7 +853,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -924,7 +924,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -997,7 +997,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1130,7 +1130,6 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1271,7 +1270,6 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1346,7 +1344,6 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1417,7 +1414,6 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1490,7 +1486,6 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1565,7 +1560,6 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1640,7 +1634,6 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1715,7 +1708,6 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
;
|
||||
@ -1787,7 +1779,7 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1866,7 +1858,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -1947,7 +1939,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2024,7 +2016,7 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2103,7 +2095,7 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2184,7 +2176,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2265,7 +2257,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
@ -2346,7 +2338,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
|
||||
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
|
||||
; GFX10-WGP-NEXT: s_endpgm
|
||||
|
Loading…
x
Reference in New Issue
Block a user