mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
b4c888fa24
Summary: VCCZBugHandledSet was used to make sure we don't apply the same workaround more than once to a single cbranch instruction, but it's not necessary because the workaround involves inserting an s_waitcnt instruction, which is enough for subsequent iterations to detect that no further workaround is necessary. Also beef up the test case to check that the workaround was only applied once. I have also manually verified that the test still passes even if I hack the big do-while loop in runOnMachineFunction to run a minimum of five iterations. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69621
50 lines
1.6 KiB
LLVM
50 lines
1.6 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
|
|
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; GCN-FUNC: {{^}}vccz_workaround:
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
|
|
; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}
|
|
; VCCZ-BUG: s_waitcnt lgkmcnt(0)
|
|
; VCCZ-BUG: s_mov_b64 vcc, vcc
|
|
; GCN-NOT: s_mov_b64 vcc, vcc
|
|
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
|
|
; GCN: buffer_store_dword
|
|
; GCN: [[EXIT]]:
|
|
; GCN: s_endpgm
|
|
define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {
|
|
entry:
|
|
%cnd = fcmp oeq float 0.0, %cond
|
|
%sgpr = load volatile i32, i32 addrspace(4)* %in
|
|
br i1 %cnd, label %if, label %endif
|
|
|
|
if:
|
|
store i32 %sgpr, i32 addrspace(1)* %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|
|
|
|
; GCN-FUNC: {{^}}vccz_noworkaround:
|
|
; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
|
|
; GCN-NOT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NOT: s_mov_b64 vcc, vcc
|
|
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
|
|
; GCN: buffer_store_dword
|
|
; GCN: [[EXIT]]:
|
|
; GCN: s_endpgm
|
|
define amdgpu_kernel void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) {
|
|
entry:
|
|
%vgpr = load volatile float, float addrspace(1)* %in
|
|
%cnd = fcmp oeq float 0.0, %vgpr
|
|
br i1 %cnd, label %if, label %endif
|
|
|
|
if:
|
|
store float %vgpr, float addrspace(1)* %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|