1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-21 18:22:53 +01:00

AMDGPU: Treat IMPLICIT_DEF like a constant lanemask source

This is partially a workaround. SILowerI1Copies does not understand
unstructured loops. This would result in inserting instructions to
merge a mask register in the same block where it was defined in an
unstructured loop.
This commit is contained in:
Matt Arsenault 2021-03-09 16:48:49 -05:00 committed by Matt Arsenault
parent bb4e957ebf
commit 8979bda8e8
2 changed files with 181 additions and 2 deletions

View File

@ -598,6 +598,11 @@ void SILowerI1Copies::lowerPhis() {
MachineBasicBlock *PostDomBound =
PDT->findNearestCommonDominator(DomBlocks);
// FIXME: This fails to find irreducible cycles. If we have a def (other
// than a constant) in a pair of blocks that end up looping back to each
// other, it will be mishandle. Due to structurization this shouldn't occur
// in practice.
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
SSAUpdater.Initialize(DstReg);
@ -732,6 +737,9 @@ bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
const MachineInstr *MI;
for (;;) {
MI = MRI->getUniqueVRegDef(Reg);
if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
return true;
if (MI->getOpcode() != AMDGPU::COPY)
break;
@ -808,9 +816,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DstReg,
unsigned PrevReg, unsigned CurReg) {
bool PrevVal;
bool PrevVal = false;
bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
bool CurVal;
bool CurVal = false;
bool CurConstant = isConstantLaneMask(CurReg, CurVal);
if (PrevConstant && CurConstant) {

View File

@ -0,0 +1,171 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s
# %bb.1 and %bb.3 loop back to each other, and thus neither dominates
# the other.
# When the phi in %bb.3 is handled, it attempted to insert instructions
# in %bb.1 to handle this def, but ended up inserting mask management
# instructions before the def of %34. This is avoided by treating
# IMPLICIT_DEF specially like constants
---
name: recursive_vreg_1_phi
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
; CHECK-LABEL: name: recursive_vreg_1_phi
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 20
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 killed [[DEF3]], killed [[DEF1]], implicit $exec
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK: [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 31, [[COPY2]], implicit $exec
; CHECK: [[DEF5:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_ASHRREV_I32_e32_]], %subreg.sub1
; CHECK: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY killed [[S_MOV_B32_2]]
; CHECK: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec
; CHECK: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[V_LSHL_B64_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
; CHECK: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 68
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_4]]
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 432
; CHECK: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 killed [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_5]], [[REG_SEQUENCE1]], 0, implicit $exec
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
; CHECK: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
; CHECK: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
; CHECK: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
; CHECK: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; CHECK: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK: S_CBRANCH_SCC1 %bb.3, implicit $scc
; CHECK: S_BRANCH %bb.2
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
; CHECK: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]]
; CHECK: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY7]], implicit $exec
; CHECK: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[V_LSHR_B32_e32_]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
; CHECK: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; CHECK: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
; CHECK: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY9]], implicit-def dead $scc
; CHECK: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK: [[DEF9:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[COPY6]], $exec, implicit-def $scc
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_OR_B64_]], $exec, implicit-def $scc
; CHECK: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x00000000), %bb.1(0x80000000)
; CHECK: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
; CHECK: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
; CHECK: [[PHI5:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.1, [[S_MOV_B64_2]], %bb.2
; CHECK: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI5]], implicit $exec
; CHECK: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; CHECK: [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK: V_CMP_NE_U32_e32 killed [[S_MOV_B32_9]], [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
; CHECK: [[S_ANDN2_B64_2:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI4]], $exec, implicit-def $scc
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
; CHECK: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_2]], [[S_AND_B64_2]], implicit-def $scc
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; CHECK: S_BRANCH %bb.4
; CHECK: bb.4:
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
%0:sreg_64 = IMPLICIT_DEF
%1:sreg_32 = S_MOV_B32 20
%2:vgpr_32 = COPY %1
%3:vgpr_32 = IMPLICIT_DEF
%4:sreg_32 = S_MOV_B32 10
%5:vgpr_32 = COPY %4
%6:vgpr_32 = IMPLICIT_DEF
%7:vgpr_32 = IMPLICIT_DEF
%8:vgpr_32 = V_OR_B32_e32 killed %7, killed %3, implicit $exec
%9:vgpr_32 = COPY $vgpr0
%10:sreg_32 = IMPLICIT_DEF
%11:vgpr_32 = V_ASHRREV_I32_e32 31, %9, implicit $exec
%12:sreg_32_xm0 = IMPLICIT_DEF
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
%14:sreg_32 = S_MOV_B32 2
%15:sgpr_32 = COPY killed %14
%16:vreg_64 = V_LSHL_B64_e64 killed %13, %15, implicit $exec
%17:vgpr_32 = FLAT_LOAD_DWORD killed %16, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
%18:sreg_32 = S_MOV_B32 0
%19:sreg_32 = S_MOV_B32 68
%20:vgpr_32 = COPY killed %19
%21:vgpr_32 = COPY %18
%22:vreg_64 = REG_SEQUENCE killed %20, %subreg.sub0, %21, %subreg.sub1
%23:sreg_32 = S_MOV_B32 432
%24:vreg_64, %25:sreg_64 = V_MAD_I64_I32_e64 killed %17, killed %23, %22, 0, implicit $exec
%26:sreg_64 = S_MOV_B64 0
%27:vreg_1 = COPY %26, implicit $exec
bb.1:
successors: %bb.2, %bb.3
%28:sreg_32 = PHI %18, %bb.0, %29, %bb.3
%30:vreg_1 = PHI %27, %bb.0, %31, %bb.3
%32:sreg_32 = S_MOV_B32 0
S_CMP_EQ_U32 %28, killed %32, implicit-def $scc
%33:sreg_64 = S_MOV_B64 -1
%34:sreg_64 = IMPLICIT_DEF
%35:vreg_1 = COPY %34
S_CBRANCH_SCC1 %bb.3, implicit $scc
S_BRANCH %bb.2
bb.2:
%36:vgpr_32 = FLAT_LOAD_DWORD %24, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
%37:sreg_32 = S_MOV_B32 6
%38:vgpr_32 = COPY %37
%39:vgpr_32 = V_LSHR_B32_e32 killed %36, killed %38, implicit $exec
%40:sreg_32 = IMPLICIT_DEF
%41:vgpr_32 = V_AND_B32_e64 1, %39, implicit $exec
%42:sreg_64 = V_CMP_EQ_U32_e64 killed %41, 1, implicit $exec
%43:sreg_64 = COPY %30
%44:sreg_64 = S_AND_B64 %43, killed %42, implicit-def dead $scc
%45:sreg_64 = COPY %30
%46:sreg_64 = S_OR_B64 killed %44, %45, implicit-def dead $scc
%47:sreg_64 = S_MOV_B64 0
%48:vreg_1 = COPY %46
bb.3:
successors: %bb.4(0x00000000), %bb.1(0x80000000)
%31:vreg_1 = PHI %35, %bb.1, %48, %bb.2
%49:sreg_64_xexec = PHI %33, %bb.1, %47, %bb.2
%29:sreg_32 = S_MOV_B32 -1
%50:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %49, implicit $exec
%51:sreg_32 = S_MOV_B32 1
%52:sreg_32 = IMPLICIT_DEF
V_CMP_NE_U32_e32 killed %51, %50, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.4
bb.4:
...