1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 12:33:33 +02:00
llvm-mirror/test/CodeGen/AMDGPU/i1-copy-phi.ll
Stanislav Mekhanoshin c00e9e688a [AMDGPU] Optimize SI_IF lowering for simple if regions
Currently SI_IF results in a s_and_saveexec_b64 followed by s_xor_b64.
The xor is used to extract only the changed bits. In case of a simple
if region where the only use of that value is in the SI_END_CF to
restore the old exec mask, we can omit the xor and perform an or of
the exec mask with the original exec value saved by the
s_and_saveexec_b64.

Differential Revision: https://reviews.llvm.org/D35861

llvm-svn: 309185
2017-07-26 21:29:15 +00:00

36 lines
1.1 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}br_i1_phi:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; SI: s_and_saveexec_b64
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]]
; SI: s_and_saveexec_b64
; SI: s_endpgm
define amdgpu_kernel void @br_i1_phi(i32 %arg) {
bb:
%tidig = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = trunc i32 %tidig to i1
br i1 %cmp, label %bb2, label %bb3
bb2: ; preds = %bb
br label %bb3
bb3: ; preds = %bb2, %bb
%tmp = phi i1 [ true, %bb2 ], [ false, %bb ]
br i1 %tmp, label %bb4, label %bb6
bb4: ; preds = %bb3
%val = load volatile i32, i32 addrspace(1)* undef
%tmp5 = mul i32 %val, %arg
br label %bb6
bb6: ; preds = %bb4, %bb3
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }