1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00
llvm-mirror/test/CodeGen/AMDGPU/i1-copy-phi.ll
Michael Liao cbc2bb2b58 [SelectionDAG] Enhance the simplification of copyto from implicit-def.
Summary:
- The current implementation simplifies the case where the source of
  `copyto` is `implicit-def`ed. However, it only works when that
  `implicit-def` is single-used since it detects that from
  `implicit-def` and cannot determine which destination vreg should be
  used if there are multiple uses.
- This patch changes that detection when `copyto` is being emitted. If
  that `copyto`'s source is defined from `implicit-def`, it simplifies
  it. Hence, it works even that `implicit-def` is multi-used.
- Except it simplifies the internal IR, it won't improve the quality of
  code generation. However, it helps to detect 'implicit-def` in a
  straight-forward manner in some passes, such as `si-i1-copies`. A test
  case is added.

Reviewers: sunfish, nhaehnle

Subscribers: jvesely, hiraditya, asbirlea, llvm-commits, yaxunl

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62342

llvm-svn: 361777
2019-05-27 18:26:29 +00:00

59 lines
1.7 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}br_i1_phi:
; SI: ; %bb
; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], 0
; SI: ; %bb2
; SI: s_mov_b64 [[TMP]], exec
; SI: ; %bb3
; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]]
define amdgpu_kernel void @br_i1_phi(i32 %arg) {
bb:
%tidig = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = trunc i32 %tidig to i1
br i1 %cmp, label %bb2, label %bb3
bb2: ; preds = %bb
br label %bb3
bb3: ; preds = %bb2, %bb
%tmp = phi i1 [ true, %bb2 ], [ false, %bb ]
br i1 %tmp, label %bb4, label %bb6
bb4: ; preds = %bb3
%val = load volatile i32, i32 addrspace(1)* undef
%tmp5 = mul i32 %val, %arg
br label %bb6
bb6: ; preds = %bb4, %bb3
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
; Make sure this won't crash.
; SI-LABEL: {{^}}vcopy_i1_undef
; SI: v_cndmask_b32_e64
; SI: v_cndmask_b32_e64
define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) {
entry:
br i1 undef, label %exit, label %false
false:
%x = load <2 x float>, <2 x float> addrspace(1)* %p
%cmp = fcmp one <2 x float> %x, zeroinitializer
br label %exit
exit:
%c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ]
%ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0>
ret <2 x float> %ret
}