mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier.
Differential revision: https://reviews.llvm.org/D70405
This commit is contained in:
parent
282514a1ea
commit
5fa4ad5707
@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
|
||||
// exit" mask.
|
||||
MachineInstr *And = nullptr, *Or = nullptr;
|
||||
if (!SkipAnding) {
|
||||
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
|
||||
Register AndReg = MRI->createVirtualRegister(BoolRC);
|
||||
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
|
||||
.addReg(Exec)
|
||||
.add(MI.getOperand(1));
|
||||
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
|
||||
.addReg(Dst)
|
||||
.addReg(AndReg)
|
||||
.add(MI.getOperand(2));
|
||||
if (LIS)
|
||||
LIS->createAndComputeVirtRegInterval(AndReg);
|
||||
} else
|
||||
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
|
||||
.add(MI.getOperand(1))
|
||||
|
@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
|
||||
; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], 0
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; CHECK-NEXT: s_branch BB0_3
|
||||
; CHECK-NEXT: BB0_1: ; %Flow1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], 0
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
|
||||
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec
|
||||
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
|
||||
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
|
||||
; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]
|
||||
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_6
|
||||
; CHECK-NEXT: BB0_3: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
; CHECK-NEXT: s_cmp_lt_u32 s0, 32
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], -1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB0_2
|
||||
; CHECK-NEXT: ; %bb.4: ; %endif1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
|
||||
; CHECK-NEXT: s_branch BB0_1
|
||||
; CHECK-NEXT: BB0_6: ; %Flow2
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
|
||||
; CHECK-NEXT: ; mask branch BB0_8
|
||||
; CHECK-NEXT: BB0_7: ; %if1
|
||||
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
|
||||
@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
; this is the divergent branch with the condition not marked as divergent
|
||||
start:
|
||||
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
|
||||
|
@ -3,11 +3,10 @@
|
||||
|
||||
; SI-LABEL: {{^}}i1_copy_from_loop:
|
||||
;
|
||||
; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1
|
||||
; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI: ; %Flow
|
||||
; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]]
|
||||
; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]]
|
||||
; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]]
|
||||
|
||||
; SI: ; %for.body
|
||||
|
@ -40,10 +40,9 @@
|
||||
|
||||
; GCN: [[FLOW]]: ; %Flow
|
||||
; GCN: ; in Loop: Header=BB0_1 Depth=1
|
||||
; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]]
|
||||
; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]]
|
||||
; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]]
|
||||
; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
|
||||
; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]]
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]]
|
||||
; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
|
||||
|
||||
; GCN: ; %bb.4: ; %bb9
|
||||
|
@ -25,22 +25,20 @@
|
||||
; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
|
||||
; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
|
||||
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
|
||||
; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]]
|
||||
; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
|
||||
; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}}
|
||||
|
||||
; GCN: ; %Flow
|
||||
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
|
||||
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
|
||||
; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]]
|
||||
; GCN: s_cbranch_execz [[FLOW2]]
|
||||
|
||||
; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
|
||||
@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP
|
||||
; OPT: llvm.amdgcn.end.cf
|
||||
|
||||
; GCN-LABEL: {{^}}multi_if_break_loop:
|
||||
; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4
|
||||
; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]]
|
||||
; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]]
|
||||
; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]]
|
||||
; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec
|
||||
; GCN: s_andn2_b64 exec, exec, [[MASK1]]
|
||||
; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: ; %bb1{{$}}
|
||||
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]],
|
||||
; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]]
|
||||
|
||||
; GCN: ; %LeafBlock1
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]]
|
||||
@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP
|
||||
; GCN: s_branch [[LOOP]]
|
||||
|
||||
; GCN: [[LOOP_EXIT]]: ; %Flow6
|
||||
; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]]
|
||||
; GCN: s_or_b64 exec, exec, [[SAVED_MASK]]
|
||||
|
||||
define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
||||
bb:
|
||||
|
@ -37,9 +37,8 @@ ENDIF:
|
||||
; SI: ; %endif
|
||||
|
||||
; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop
|
||||
; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
|
||||
; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]]
|
||||
; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]]
|
||||
; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]]
|
||||
; SI: s_andn2_b64 exec, exec, [[LEFT]]
|
||||
; SI: s_cbranch_execnz [[LOOP_LABEL]]
|
||||
; SI: s_endpgm
|
||||
|
@ -223,9 +223,8 @@ exit:
|
||||
; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
|
||||
; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
|
||||
; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]],
|
||||
; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]]
|
||||
; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]]
|
||||
; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
|
||||
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
|
||||
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
|
@ -243,14 +243,12 @@ bb13:
|
||||
; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]]
|
||||
; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]]
|
||||
; GCN: BB{{.*}}: ; %Flow
|
||||
; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]]
|
||||
; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]]
|
||||
; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]]
|
||||
; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]]
|
||||
; GFX1032: s_mov_b32 [[ACC]], [[MASK0]]
|
||||
; GFX1064: s_mov_b64 [[ACC]], [[MASK0]]
|
||||
; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]]
|
||||
; GFX1064: s_andn2_b64 exec, exec, [[MASK0]]
|
||||
; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]]
|
||||
; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]]
|
||||
; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]]
|
||||
; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]]
|
||||
; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]]
|
||||
; GFX1064: s_andn2_b64 exec, exec, [[ACC]]
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: s_load_dword [[LOAD:s[0-9]+]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user