1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

AMDGPU: Fix not expanding control flow after some kill blocks

Also stop trying to insert skip blocks at end_cf. This
was inserting them at the end of the block which doesn't make
sense. The skip should be inserted at the beginning of the block
right after the end cf. Just remove this for now since no tests
seem to stress this and I think this can be handled more generally
later.

Fixes bug 28550

llvm-svn: 275510
This commit is contained in:
Matt Arsenault 2016-07-15 00:58:15 +00:00
parent fc17b3d32f
commit 30ac294bf5
2 changed files with 51 additions and 7 deletions

View File

@ -748,13 +748,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_END_CF:
if (--Depth == 0 && HaveKill) {
HaveKill = false;
if (skipIfDead(MI, *NextBB)) {
NextBB = std::next(BI);
BE = MF.end();
Next = MBB.end();
}
// TODO: Insert skip if exec is 0?
}
EndCf(MI);
break;
@ -763,7 +759,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
if (skipIfDead(MI, *NextBB)) {
NextBB = std::next(BI);
BE = MF.end();
Next = MBB.end();
}
} else
HaveKill = true;

View File

@ -336,6 +336,55 @@ bb7: ; preds = %bb4
ret void
}
; CHECK-LABEL: {{^}}if_after_kill_block:
; CHECK: ; BB#0:
; CHECK: s_and_saveexec_b64
; CHECK: s_xor_b64
; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
; CHECK: v_cmpx_le_f32_e32 vcc, 0,
; CHECK: [[BB4]]:
; CHECK: s_or_b64 exec, exec
; CHECK: image_sample_c
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
; CHECK: s_and_b64 exec, exec,
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
; CHECK-NOT: branch
; CHECK: ; BB#3: ; %bb8
; CHECK: buffer_store_dword
; CHECK: [[END]]:
; CHECK: s_or_b64 exec, exec
; CHECK: s_endpgm
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x i32> %arg2) #0 {
bb:
%tmp = fcmp ult float %arg1, 0.000000e+00
br i1 %tmp, label %bb3, label %bb4
bb3: ; preds = %bb
call void @llvm.AMDGPU.kill(float %arg)
br label %bb4
bb4: ; preds = %bb3, %bb
%tmp5 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp6 = extractelement <4 x float> %tmp5, i32 0
%tmp7 = fcmp une float %tmp6, 0.000000e+00
br i1 %tmp7, label %bb8, label %bb9
bb8: ; preds = %bb9, %bb4
store volatile i32 9, i32 addrspace(1)* undef
ret void
bb9: ; preds = %bb4
ret void
}
declare void @llvm.AMDGPU.kill(float) #0
declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }