diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 66c66425691..ba14b7be858 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -186,6 +186,9 @@ private: LegalizeResult fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, + unsigned TypeIdx, LLT NarrowTy); + LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8491763d991..54dd644bcd6 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -34,8 +34,10 @@ using namespace LegalizeActions; /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, /// with any leftover piece as type \p LeftoverTy /// -/// Returns -1 if the breakdown is not satisfiable. -static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { +/// Returns -1 in the first element of the pair if the breakdown is not +/// satisfiable. +static std::pair +getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { assert(!LeftoverTy.isValid() && "this is an out argument"); unsigned Size = OrigTy.getSizeInBits(); @@ -45,18 +47,19 @@ static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { assert(Size > NarrowSize); if (LeftoverSize == 0) - return NumParts; + return {NumParts, 0}; if (NarrowTy.isVector()) { unsigned EltSize = OrigTy.getScalarSizeInBits(); if (LeftoverSize % EltSize != 0) - return -1; + return {-1, -1}; LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); } else { LeftoverTy = LLT::scalar(LeftoverSize); } - return NumParts; + int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); + return std::make_pair(NumParts, NumLeftover); } LegalizerHelper::LegalizerHelper(MachineFunction &MF, @@ -1759,10 +1762,12 @@ LegalizerHelper::fewerElementsVectorMultiEltType( LLT DstTy = MRI.getType(DstReg); LLT LeftoverTy0; + int NumParts, NumLeftover; // All of the operands need to have the same number of elements, so if we can // determine a type breakdown for the result type, we can for all of the // source types. - int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0); + std::tie(NumParts, NumLeftover) + = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0); if (NumParts < 0) return UnableToLegalize; @@ -2017,6 +2022,73 @@ LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + const unsigned DstReg = MI.getOperand(0).getReg(); + LLT PhiTy = MRI.getType(DstReg); + LLT LeftoverTy; + + // All of the operands need to have the same number of elements, so if we can + // determine a type breakdown for the result type, we can for all of the + // source types. + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) + = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); + if (NumParts < 0) + return UnableToLegalize; + + SmallVector DstRegs, LeftoverDstRegs; + SmallVector NewInsts; + + const int TotalNumParts = NumParts + NumLeftover; + + // Insert the new phis in the result block first. + for (int I = 0; I != TotalNumParts; ++I) { + LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; + unsigned PartDstReg = MRI.createGenericVirtualRegister(Ty); + NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) + .addDef(PartDstReg)); + if (I < NumParts) + DstRegs.push_back(PartDstReg); + else + LeftoverDstRegs.push_back(PartDstReg); + } + + MachineBasicBlock *MBB = MI.getParent(); + MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); + insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); + + SmallVector PartRegs, LeftoverRegs; + + // Insert code to extract the incoming values in each predecessor block. + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + PartRegs.clear(); + LeftoverRegs.clear(); + + unsigned SrcReg = MI.getOperand(I).getReg(); + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + + LLT Unused; + if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, + LeftoverRegs)) + return UnableToLegalize; + + // Add the newly created operand splits to the existing instructions. The + // odd-sized pieces are ordered after the requested NarrowTyArg sized + // pieces. + for (int J = 0; J != TotalNumParts; ++J) { + MachineInstrBuilder MIB = NewInsts[J]; + MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); + MIB.addMBB(&OpMBB); + } + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2038,14 +2110,17 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT ValTy = MRI.getType(ValReg); int NumParts = -1; + int NumLeftover = -1; LLT LeftoverTy; SmallVector NarrowRegs, NarrowLeftoverRegs; if (IsLoad) { - NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); + std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); } else { if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, - NarrowLeftoverRegs)) + NarrowLeftoverRegs)) { NumParts = NarrowRegs.size(); + NumLeftover = NarrowLeftoverRegs.size(); + } } if (NumParts == -1) @@ -2169,6 +2244,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); case G_SELECT: return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + case G_PHI: + return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index d83daba16ae..61d38ba0e6f 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -159,6 +159,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor(AddrSpaces32) .clampScalar(0, S32, S256) .widenScalarToNextPow2(0, 32) + .clampMaxNumElements(0, S32, 16) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .legalIf(isPointer(0)); diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 30bd542da8a..6f4b5667054 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_phi_s32 @@ -391,6 +391,208 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 S_SETPC_B64 undef $sgpr30_sgpr31 +... +--- +name: test_phi_v8s32 +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: test_phi_v8s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(s32) = COPY $vgpr8 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + G_BRCOND %3, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %4:_(<8 x s32>) = G_ADD %0, %0 + G_BR %bb.2 + + bb.2: + %5:_(<8 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + S_SETPC_B64 undef $sgpr30_sgpr31 + +... +--- +name: test_phi_v16s32 +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: test_phi_v16s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + + %0:_(<16 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = COPY $vgpr4 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + G_BRCOND %3, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %4:_(<16 x s32>) = G_ADD %0, %0 + G_BR %bb.2 + + bb.2: + %5:_(<16 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5 + +... + +--- +name: test_phi_v32s32 +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: test_phi_v32s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF1]](<16 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s32>) + ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] + ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] + ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] + ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] + ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] + ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] + ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] + ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] + ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] + ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] + ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] + ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] + ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] + ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] + ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] + ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] + ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK: [[UV64:%[0-9]+]]:_(<16 x s32>), [[UV65:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[UV64]](<16 x s32>), %bb.1 + ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[UV65]](<16 x s32>), %bb.1 + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>) + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS1]](<32 x s32>) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + + %0:_(<32 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = COPY $vgpr4 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + G_BRCOND %3, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %4:_(<32 x s32>) = G_ADD %0, %0 + G_BR %bb.2 + + bb.2: + %5:_(<32 x s32>) = G_PHI %0, %bb.0, %4, %bb.1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %5 + ... --- name: test_phi_s64 diff --git a/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 2e4cd0640a2..67b897fb6a3 100644 --- a/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -635,4 +635,98 @@ TEST_F(GISelMITest, MoreElementsAnd) { EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } + +TEST_F(GISelMITest, FewerElementsPhi) { + if (!TM) + return; + + LLT s1 = LLT::scalar(1); + LLT s32 = LLT::scalar(32); + LLT s64 = LLT::scalar(64); + LLT v2s32 = LLT::vector(2, 32); + LLT v5s32 = LLT::vector(5, 32); + + LegalizerInfo LI; + LI.getActionDefinitionsBuilder(TargetOpcode::G_PHI) + .legalFor({v2s32}) + .clampMinNumElements(0, s32, 2); + LI.computeTables(); + + LLT PhiTy = v5s32; + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, LI, Observer, B); + B.setMBB(*EntryMBB); + + MachineBasicBlock *MidMBB = MF->CreateMachineBasicBlock(); + MachineBasicBlock *EndMBB = MF->CreateMachineBasicBlock(); + MF->insert(MF->end(), MidMBB); + MF->insert(MF->end(), EndMBB); + + EntryMBB->addSuccessor(MidMBB); + EntryMBB->addSuccessor(EndMBB); + MidMBB->addSuccessor(EndMBB); + + auto InitVal = B.buildUndef(PhiTy); + auto InitOtherVal = B.buildConstant(s64, 999); + + auto ICmp = B.buildICmp(CmpInst::ICMP_EQ, s1, Copies[0], Copies[1]); + B.buildBrCond(ICmp.getReg(0), *MidMBB); + B.buildBr(*EndMBB); + + + B.setMBB(*MidMBB); + auto MidVal = B.buildUndef(PhiTy); + auto MidOtherVal = B.buildConstant(s64, 345); + B.buildBr(*EndMBB); + + B.setMBB(*EndMBB); + auto Phi = B.buildInstr(TargetOpcode::G_PHI) + .addDef(MRI->createGenericVirtualRegister(PhiTy)) + .addUse(InitVal.getReg(0)) + .addMBB(EntryMBB) + .addUse(MidVal.getReg(0)) + .addMBB(MidMBB); + + // Insert another irrelevant phi to make sure the rebuild is inserted after + // it. + B.buildInstr(TargetOpcode::G_PHI) + .addDef(MRI->createGenericVirtualRegister(s64)) + .addUse(InitOtherVal.getReg(0)) + .addMBB(EntryMBB) + .addUse(MidOtherVal.getReg(0)) + .addMBB(MidMBB); + + // Add some use instruction after the phis. + B.buildAnd(PhiTy, Phi.getReg(0), Phi.getReg(0)); + + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.fewerElementsVector(*Phi, 0, v2s32)); + + auto CheckStr = R"( + CHECK: [[INITVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + CHECK: [[EXTRACT0:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 0 + CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 64 + CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 128 + CHECK: G_BRCOND + + CHECK: [[MIDVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 0 + CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 64 + CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 128 + CHECK: G_BR + + CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT0]]:_(<2 x s32>), %bb.0, [[EXTRACT3]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT1]]:_(<2 x s32>), %bb.0, [[EXTRACT4]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1 + + CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI + CHECK: [[REBUILD_VAL_IMPDEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + CHECK: [[INSERT0:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[REBUILD_VAL_IMPDEF]]:_, [[PHI0]]:_(<2 x s32>), 0 + CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT0]]:_, [[PHI1]]:_(<2 x s32>), 64 + CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]]:_, [[PHI2]]:_(s32), 128 + CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[INSERT2]]:_, [[INSERT2]]:_ + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} } // namespace