1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir
Matt Arsenault ec8b110c78 AMDGPU: Check for other defs when folding conditions into s_andn2_b64
We can't fold the masked compare value through the select if the
select condition is re-defed after the and instruction. Fixes a
verifier error and trying to use the outgoing value defined in the
block.

I'm not sure why this pass is bothering to handle physregs. It's
making this more complex and forces extra liveness computation.
2020-07-28 16:36:23 -04:00

202 lines
7.1 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra,si-optimize-exec-masking-pre-ra -o - %s | FileCheck %s
# FIXME: Second run of the pass is a workaround for a bug in
# -run-pass. The verifier doesn't detect broken LiveIntervals, see bug
# 46873
# Cannot fold this without moving the def of %7 after the and.
---
name: no_fold_andn2_select_condition_live_out_phi
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi
; CHECK: bb.0:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
; CHECK: undef %1.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
; CHECK: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
; CHECK: %1.sub1:vreg_64 = COPY %1.sub0
; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: ATOMIC_FENCE 4, 2
; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK: S_BRANCH %bb.2
bb.0:
successors: %bb.2
%7:sreg_64_xexec = S_MOV_B64 -1
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.2
bb.1:
S_ENDPGM 0
bb.2:
successors: %bb.1, %bb.2
%4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
%5.sub1:vreg_64 = COPY %5.sub0
DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
ATOMIC_FENCE 4, 2
%7:sreg_64_xexec = S_MOV_B64 0
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_BRANCH %bb.2
...
# It's OK to fold this, since the phi def is after the andn2 insert point.
---
name: fold_andn2_select_condition_live_out_phi_reorder
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_reorder
; CHECK: bb.0:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
; CHECK: undef %1.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: %1.sub1:vreg_64 = COPY %1.sub0
; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: ATOMIC_FENCE 4, 2
; CHECK: $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc
; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK: S_BRANCH %bb.2
bb.0:
successors: %bb.2
%7:sreg_64_xexec = S_MOV_B64 -1
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.2
bb.1:
S_ENDPGM 0
bb.2:
successors: %bb.1, %bb.2
%4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
%5.sub1:vreg_64 = COPY %5.sub0
DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
ATOMIC_FENCE 4, 2
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
%7:sreg_64_xexec = S_MOV_B64 0
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_BRANCH %bb.2
...
---
name: no_fold_andn2_select_condition_live_out_phi_physreg
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi_physreg
; CHECK: bb.0:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $sgpr4_sgpr5 = S_MOV_B64 -1
; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $sgpr4_sgpr5
; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
; CHECK: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
; CHECK: %0.sub1:vreg_64 = COPY %0.sub0
; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: ATOMIC_FENCE 4, 2
; CHECK: $sgpr4_sgpr5 = S_MOV_B64 0
; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK: S_BRANCH %bb.2
bb.0:
successors: %bb.2
$sgpr4_sgpr5 = S_MOV_B64 -1
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.2
bb.1:
S_ENDPGM 0
bb.2:
successors: %bb.1, %bb.2
liveins: $sgpr4_sgpr5
%4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
%5.sub1:vreg_64 = COPY %5.sub0
DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
ATOMIC_FENCE 4, 2
$sgpr4_sgpr5 = S_MOV_B64 0
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_BRANCH %bb.2
...
---
name: fold_andn2_select_condition_live_out_phi_physreg_reorder
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_physreg_reorder
; CHECK: bb.0:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $sgpr4_sgpr5 = S_MOV_B64 -1
; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $sgpr4_sgpr5
; CHECK: %0.sub1:vreg_64 = COPY %0.sub0
; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: ATOMIC_FENCE 4, 2
; CHECK: $vcc = S_ANDN2_B64 $exec, $sgpr4_sgpr5, implicit-def dead $scc
; CHECK: $sgpr4_sgpr5 = S_MOV_B64 0
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK: S_BRANCH %bb.2
bb.0:
successors: %bb.2
$sgpr4_sgpr5 = S_MOV_B64 -1
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.2
bb.1:
S_ENDPGM 0
bb.2:
successors: %bb.1, %bb.2
liveins: $sgpr4_sgpr5
%4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
%5.sub1:vreg_64 = COPY %5.sub0
DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
ATOMIC_FENCE 4, 2
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
$sgpr4_sgpr5 = S_MOV_B64 0
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_BRANCH %bb.2
...