1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
Alexander Timofeev 22ba99e3f8 [AMDGPU] Partial revert for the ba447bae7448435c9986eece0811da1423972fdd
"Divergence driven ISel. Assign register class for cross block values
       according to the divergence."
       that discovered the design flaw leading to several issues that
       required to be solved before.

       This change reverts AMDGPU specific changes and keeps common part
       unaffected.

llvm-svn: 362749
2019-06-06 21:13:02 +00:00

46 lines
1.5 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
; Don't crash when the use of an undefined value is only detected by the
; register coalescer because it is hidden with subregister insert/extract.
target triple="amdgcn--"
; CHECK-LABEL: foobar:
; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
; CHECK-NEXT: s_mov_b32 s2, -1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CHECK: BB0_1:
; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: BB0_2:
; CHECK: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
; CHECK-NEXT: s_endpgm
define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
entry:
%v0 = insertelement <4 x float> undef, float %a0, i32 0
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%cnd = icmp eq i32 %tid, 0
br i1 %cnd, label %ift, label %ife
ift:
%v1 = insertelement <4 x float> undef, float %a1, i32 0
br label %ife
ife:
%val = phi <4 x float> [ %v1, %ift ], [ %v0, %entry ]
%v2 = extractelement <4 x float> %val, i32 1
store float %v2, float addrspace(1)* %out, align 4
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
attributes #0 = { nounwind readnone }