mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
d17e4a4a31
If a workgroup size is known to be not greater than wavefront size the s_barrier instruction is not needed since all threads are guaranteed to come to the same point at the same time. This is the same optimization that was implemented for SelectionDAG in D31731. Differential Revision: https://reviews.llvm.org/D86609
32 lines
867 B
LLVM
32 lines
867 B
LLVM
; RUN: llc -march=amdgcn < %s | FileCheck %s
|
|
; RUN: llc -march=amdgcn < %s -global-isel | FileCheck %s
|
|
|
|
; CHECK-LABEL: {{^}}unknown_wgs:
|
|
; CHECK: s_barrier
|
|
define amdgpu_kernel void @unknown_wgs() {
|
|
tail call void @llvm.amdgcn.s.barrier() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
|
|
; CHECK: s_barrier
|
|
define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
|
|
tail call void @llvm.amdgcn.s.barrier() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
|
|
; CHECK: :
|
|
; CHECK-NEXT: ; wave barrier
|
|
; CHECK-NEXT: s_endpgm
|
|
define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
|
|
tail call void @llvm.amdgcn.s.barrier() #0
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.s.barrier() #0
|
|
|
|
attributes #0 = { convergent nounwind }
|
|
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
|
|
attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
|