mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
a70016c8d5
Add the scratch wave offset to the scratch buffer descriptor (SRSrc) in the entry function prologue. This allows us to removes the scratch wave offset register from the calling convention ABI. As part of this change, allow the use of an inline constant zero for the SOffset of MUBUF instructions accessing the stack in entry functions when a frame pointer is not requested/required. Entry functions with calls still need to set up the calling convention ABI stack pointer register, and reference it in order to address arguments of called functions. The ABI stack pointer register remains unswizzled, but is now wave-relative instead of queue-relative. Non-entry functions also use an inline constant zero SOffset for wave-relative scratch access, but continue to use the stack and frame pointers as before. When the stack or frame pointer is converted to a swizzled offset it is now scaled directly, as the scratch wave offset no longer needs to be subtracted first. Update llvm/docs/AMDGPUUsage.rst to reflect these changes to the calling convention. Tags: #llvm Differential Revision: https://reviews.llvm.org/D75138
755 lines
29 KiB
LLVM
755 lines
29 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
|
|
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
; GCN-LABEL: {{^}}system_one_as_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_one_as_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_one_as_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_one_as_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_one_as_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_one_as_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_one_as_release:
|
|
; GCN: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_one_as_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_one_as_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_one_as_seq_cst:
|
|
; GCN: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_one_as_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_one_as_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_one_as_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_one_as_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_one_as_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_one_as_release:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_one_as_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_one_as_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_one_as_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_one_as_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_one_as_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_one_as_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_one_as_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_one_as_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_one_as_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_one_as_release:
|
|
; GCN: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_one_as_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_one_as_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
|
|
; GCN: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_one_as_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_one_as_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_one_as_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_one_as_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_one_as_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_one_as_release:
|
|
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_one_as_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_one_as_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
|
|
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_one_as_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_one_as_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_one_as_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_one_as_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_one_as_release:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_one_as_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_one_as_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_one_as_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_private_0:
|
|
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen glc slc{{$}}
|
|
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_private_0
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_private_0(
|
|
i32* %in, i32 addrspace(5)* %out) {
|
|
entry:
|
|
%val = load i32, i32* %in, align 4
|
|
store i32 %val, i32 addrspace(5)* %out, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_private_1:
|
|
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen glc slc{{$}}
|
|
; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_private_1
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_private_1(
|
|
i32* %in, i32 addrspace(5)* %out) {
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%val = load i32, i32* %in, align 4
|
|
%out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid
|
|
store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_global_0:
|
|
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
|
|
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_global_0
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_global_0(
|
|
i32* %in, i32 addrspace(1)* %out) {
|
|
entry:
|
|
%val = load i32, i32* %in, align 4
|
|
store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_global_1:
|
|
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
|
|
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_global_1
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_global_1(
|
|
i32* %in, i32 addrspace(1)* %out) {
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%val = load i32, i32* %in, align 4
|
|
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
|
store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_local_0:
|
|
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_local_0
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_local_0(
|
|
i32* %in, i32 addrspace(3)* %out) {
|
|
entry:
|
|
%val = load i32, i32* %in, align 4
|
|
store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_local_1:
|
|
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_local_1
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_local_1(
|
|
i32* %in, i32 addrspace(3)* %out) {
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%val = load i32, i32* %in, align 4
|
|
%out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
|
|
store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_flat_0:
|
|
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_flat_0
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_flat_0(
|
|
i32* %in, i32* %out) {
|
|
entry:
|
|
%val = load i32, i32* %in, align 4
|
|
store i32 %val, i32* %out, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nontemporal_flat_1:
|
|
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
|
|
; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
|
|
; GFX10: .amdhsa_kernel nontemporal_flat_1
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @nontemporal_flat_1(
|
|
i32* %in, i32* %out) {
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%val = load i32, i32* %in, align 4
|
|
%out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
|
|
store i32 %val, i32* %out.gep, !nontemporal !0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_release:
|
|
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}system_seq_cst:
|
|
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel system_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @system_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_release:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}singlethread_seq_cst:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel singlethread_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @singlethread_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_release:
|
|
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}agent_seq_cst:
|
|
; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt lgkmcnt(0){{$}}
|
|
; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel agent_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @agent_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_release:
|
|
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}workgroup_seq_cst:
|
|
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
|
; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel workgroup_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @workgroup_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_unordered:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_unordered
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_unordered(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_monotonic:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_monotonic
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_monotonic(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_release:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_release
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_release(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}wavefront_seq_cst:
|
|
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
|
|
; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
|
|
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
|
; GFX10: .amdhsa_kernel wavefront_seq_cst
|
|
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10CU: .amdhsa_workgroup_processor_mode 0
|
|
; GFX10-NOT: .amdhsa_memory_ordered 0
|
|
define amdgpu_kernel void @wavefront_seq_cst(
|
|
i32 %in, i32* %out) {
|
|
entry:
|
|
store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
!0 = !{i32 1}
|