mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
8e997b3d9b
We have workarounds for two different cases where vccz can get out of sync with the value in vcc. This fixes them in two ways: 1. Fix the case where the def of vcc was in a previous basic block, by pessimistically assuming that vccz might be incorrect at a basic block boundary. 2. Fix the handling of pre-existing waitcnt instructions by calling generateWaitcntInstBefore before examining ScoreBrackets to determine whether there's an outstanding smem read operation. Differential Revision: https://reviews.llvm.org/D91636
37 lines
1.2 KiB
LLVM
37 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
define amdgpu_kernel void @test(i32 %arg, i32 %arg1) {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: ; %bb.0: ; %bb
|
|
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: v_cmp_eq_u32_e64 s[2:3], s0, 0
|
|
; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0
|
|
; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
|
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; CHECK-NEXT: s_cbranch_vccnz BB0_3
|
|
; CHECK-NEXT: ; %bb.1: ; %bb9
|
|
; CHECK-NEXT: s_and_b64 vcc, exec, 0
|
|
; CHECK-NEXT: BB0_2: ; %bb10
|
|
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: s_mov_b64 vcc, vcc
|
|
; CHECK-NEXT: s_cbranch_vccz BB0_2
|
|
; CHECK-NEXT: BB0_3: ; %DummyReturnBlock
|
|
; CHECK-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = icmp ne i32 %arg, 0
|
|
%tmp7 = icmp ne i32 %arg1, 0
|
|
%tmp8 = and i1 %tmp, %tmp7
|
|
br i1 %tmp8, label %bb9, label %bb11
|
|
|
|
bb9: ; preds = %bb
|
|
br label %bb10
|
|
|
|
bb10: ; preds = %bb10, %bb9
|
|
br label %bb10
|
|
|
|
bb11: ; preds = %bb
|
|
ret void
|
|
}
|