mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU: Fix not expanding control flow after some kill blocks
Also stop trying to insert skip blocks at end_cf. This was inserting them at the end of the block which doesn't make sense. The skip should be inserted at the beginning of the block right after the end cf. Just remove this for now since no tests seem to stress this and I think this can be handled more generally later. Fixes bug 28550 llvm-svn: 275510
This commit is contained in:
parent
fc17b3d32f
commit
30ac294bf5
@ -748,13 +748,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
||||
case AMDGPU::SI_END_CF:
|
||||
if (--Depth == 0 && HaveKill) {
|
||||
HaveKill = false;
|
||||
|
||||
if (skipIfDead(MI, *NextBB)) {
|
||||
NextBB = std::next(BI);
|
||||
BE = MF.end();
|
||||
Next = MBB.end();
|
||||
}
|
||||
// TODO: Insert skip if exec is 0?
|
||||
}
|
||||
|
||||
EndCf(MI);
|
||||
break;
|
||||
|
||||
@ -763,7 +759,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipIfDead(MI, *NextBB)) {
|
||||
NextBB = std::next(BI);
|
||||
BE = MF.end();
|
||||
Next = MBB.end();
|
||||
}
|
||||
} else
|
||||
HaveKill = true;
|
||||
|
@ -336,6 +336,55 @@ bb7: ; preds = %bb4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}if_after_kill_block:
|
||||
; CHECK: ; BB#0:
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK: s_xor_b64
|
||||
; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_cmpx_le_f32_e32 vcc, 0,
|
||||
; CHECK: [[BB4]]:
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
; CHECK: image_sample_c
|
||||
|
||||
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
|
||||
; CHECK: s_and_b64 exec, exec,
|
||||
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
|
||||
; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
|
||||
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NOT: branch
|
||||
|
||||
; CHECK: ; BB#3: ; %bb8
|
||||
; CHECK: buffer_store_dword
|
||||
|
||||
; CHECK: [[END]]:
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x i32> %arg2) #0 {
|
||||
bb:
|
||||
%tmp = fcmp ult float %arg1, 0.000000e+00
|
||||
br i1 %tmp, label %bb3, label %bb4
|
||||
|
||||
bb3: ; preds = %bb
|
||||
call void @llvm.AMDGPU.kill(float %arg)
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb3, %bb
|
||||
%tmp5 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp6 = extractelement <4 x float> %tmp5, i32 0
|
||||
%tmp7 = fcmp une float %tmp6, 0.000000e+00
|
||||
br i1 %tmp7, label %bb8, label %bb9
|
||||
|
||||
bb8: ; preds = %bb9, %bb4
|
||||
store volatile i32 9, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
bb9: ; preds = %bb4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float) #0
|
||||
declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user