1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier.

Differential revision: https://reviews.llvm.org/D70405
This commit is contained in:
vpykhtin 2019-11-18 20:06:48 +03:00
parent 282514a1ea
commit 5fa4ad5707
8 changed files with 45 additions and 50 deletions

View File

@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
if (!SkipAnding) {
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
Register AndReg = MRI->createVirtualRegister(BoolRC);
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(Dst)
.addReg(AndReg)
.add(MI.getOperand(2));
if (LIS)
LIS->createAndComputeVirtRegInterval(AndReg);
} else
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))

View File

@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: s_mov_b64 s[2:3], 0
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
; CHECK-NEXT: s_branch BB0_3
; CHECK-NEXT: BB0_1: ; %Flow1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_mov_b64 s[10:11], 0
; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: BB0_2: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB0_6
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
; CHECK-NEXT: s_cmp_lt_u32 s0, 32
; CHECK-NEXT: s_mov_b64 s[10:11], -1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_cbranch_scc0 BB0_2
; CHECK-NEXT: ; %bb.4: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: s_branch BB0_1
; CHECK-NEXT: BB0_6: ; %Flow2
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
; CHECK-NEXT: ; mask branch BB0_8
; CHECK-NEXT: BB0_7: ; %if1
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT: s_endpgm
; this is the divergent branch with the condition not marked as divergent
start:
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)

View File

@ -3,11 +3,10 @@
; SI-LABEL: {{^}}i1_copy_from_loop:
;
; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1
; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
; SI: ; %Flow
; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec
; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]]
; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]]
; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]]
; SI: ; %for.body

View File

@ -40,10 +40,9 @@
; GCN: [[FLOW]]: ; %Flow
; GCN: ; in Loop: Header=BB0_1 Depth=1
; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]]
; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]]
; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]]
; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]]
; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]]
; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]]
; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
; GCN: ; %bb.4: ; %bb9

View File

@ -25,22 +25,20 @@
; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]]
; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]]
; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]]
; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}}
; GCN: ; %Flow
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]]
; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]]
; GCN: s_cbranch_execz [[FLOW2]]
; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP
; OPT: llvm.amdgcn.end.cf
; GCN-LABEL: {{^}}multi_if_break_loop:
; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4
; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]]
; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]]
; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]]
; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}}
; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]]
; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec
; GCN: s_andn2_b64 exec, exec, [[MASK1]]
; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]]
; GCN: ; %bb1{{$}}
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]],
; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]]
; GCN: ; %LeafBlock1
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]]
@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP
; GCN: s_branch [[LOOP]]
; GCN: [[LOOP_EXIT]]: ; %Flow6
; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]]
; GCN: s_or_b64 exec, exec, [[SAVED_MASK]]
define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
bb:

View File

@ -37,9 +37,8 @@ ENDIF:
; SI: ; %endif
; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop
; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]]
; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]]
; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]]
; SI: s_andn2_b64 exec, exec, [[LEFT]]
; SI: s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm

View File

@ -223,9 +223,8 @@ exit:
; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]],
; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]]
; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]]
; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]]
; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]]
; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
; SI: [[LABEL_EXIT]]:

View File

@ -243,14 +243,12 @@ bb13:
; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]]
; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]]
; GCN: BB{{.*}}: ; %Flow
; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]]
; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]]
; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]]
; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]]
; GFX1032: s_mov_b32 [[ACC]], [[MASK0]]
; GFX1064: s_mov_b64 [[ACC]], [[MASK0]]
; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]]
; GFX1064: s_andn2_b64 exec, exec, [[MASK0]]
; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]]
; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]]
; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]]
; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]]
; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]]
; GFX1064: s_andn2_b64 exec, exec, [[ACC]]
; GCN: s_cbranch_execz
; GCN: BB{{.*}}:
; GCN: s_load_dword [[LOAD:s[0-9]+]]