mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
c00e9e688a
Currently SI_IF results in a s_and_saveexec_b64 followed by s_xor_b64. The xor is used to extract only the changed bits. In case of a simple if region where the only use of that value is in the SI_END_CF to restore the old exec mask, we can omit the xor and perform an or of the exec mask with the original exec value saved by the s_and_saveexec_b64. Differential Revision: https://reviews.llvm.org/D35861 llvm-svn: 309185
36 lines
1.1 KiB
LLVM
36 lines
1.1 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
|
|
; SI-LABEL: {{^}}br_i1_phi:
|
|
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
|
; SI: s_and_saveexec_b64
|
|
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
|
|
; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]]
|
|
; SI: s_and_saveexec_b64
|
|
; SI: s_endpgm
|
|
define amdgpu_kernel void @br_i1_phi(i32 %arg) {
|
|
bb:
|
|
%tidig = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%cmp = trunc i32 %tidig to i1
|
|
br i1 %cmp, label %bb2, label %bb3
|
|
|
|
bb2: ; preds = %bb
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb2, %bb
|
|
%tmp = phi i1 [ true, %bb2 ], [ false, %bb ]
|
|
br i1 %tmp, label %bb4, label %bb6
|
|
|
|
bb4: ; preds = %bb3
|
|
%val = load volatile i32, i32 addrspace(1)* undef
|
|
%tmp5 = mul i32 %val, %arg
|
|
br label %bb6
|
|
|
|
bb6: ; preds = %bb4, %bb3
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|