1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll
Jay Foad 8e997b3d9b [AMDGPU] Fix and extend vccz workarounds
We have workarounds for two different cases where vccz can get out of
sync with the value in vcc. This fixes them in two ways:

1. Fix the case where the def of vcc was in a previous basic block, by
pessimistically assuming that vccz might be incorrect at a basic block
boundary.

2. Fix the handling of pre-existing waitcnt instructions by calling
generateWaitcntInstBefore before examining ScoreBrackets to determine
whether there's an outstanding smem read operation.

Differential Revision: https://reviews.llvm.org/D91636
2020-11-18 15:26:06 +00:00

37 lines
1.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
define amdgpu_kernel void @test(i32 %arg, i32 %arg1) {
; CHECK-LABEL: test:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u32_e64 s[2:3], s0, 0
; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0
; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
; CHECK-NEXT: s_cbranch_vccnz BB0_3
; CHECK-NEXT: ; %bb.1: ; %bb9
; CHECK-NEXT: s_and_b64 vcc, exec, 0
; CHECK-NEXT: BB0_2: ; %bb10
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccz BB0_2
; CHECK-NEXT: BB0_3: ; %DummyReturnBlock
; CHECK-NEXT: s_endpgm
bb:
%tmp = icmp ne i32 %arg, 0
%tmp7 = icmp ne i32 %arg1, 0
%tmp8 = and i1 %tmp, %tmp7
br i1 %tmp8, label %bb9, label %bb11
bb9: ; preds = %bb
br label %bb10
bb10: ; preds = %bb10, %bb9
br label %bb10
bb11: ; preds = %bb
ret void
}