diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 056e1049461..28ebbd9101c 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -555,39 +555,97 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { return false; } +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI, + unsigned Size, unsigned Offset) { + switch (Size) { + case 32: + return TRI.getSubRegFromChannel(Offset / 32); + case 64: { + switch (Offset) { + case 0: + return AMDGPU::sub0_sub1; + case 32: + return AMDGPU::sub1_sub2; + case 64: + return AMDGPU::sub2_sub3; + case 96: + return AMDGPU::sub4_sub5; + case 128: + return AMDGPU::sub5_sub6; + case 160: + return AMDGPU::sub7_sub8; + // FIXME: Missing cases up to 1024 bits + default: + return AMDGPU::NoSubRegister; + } + } + case 96: { + switch (Offset) { + case 0: + return AMDGPU::sub0_sub1_sub2; + case 32: + return AMDGPU::sub1_sub2_sub3; + case 64: + return AMDGPU::sub2_sub3_sub4; + } + } + default: + return AMDGPU::NoSubRegister; + } +} + bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); + + Register DstReg = I.getOperand(0).getReg(); Register Src0Reg = I.getOperand(1).getReg(); Register Src1Reg = I.getOperand(2).getReg(); LLT Src1Ty = MRI->getType(Src1Reg); - if (Src1Ty.getSizeInBits() != 32) - return false; + + unsigned DstSize = MRI->getType(DstReg).getSizeInBits(); + unsigned InsSize = Src1Ty.getSizeInBits(); int64_t Offset = I.getOperand(3).getImm(); if (Offset % 32 != 0) return false; - unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); + unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset); + if (SubReg == AMDGPU::NoSubRegister) + return false; + + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + if (!DstRC) + return false; + + const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); + const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); + const TargetRegisterClass *Src0RC = + TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI); + const TargetRegisterClass *Src1RC = + TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI); + + // Deal with weird cases where the class only partially supports the subreg + // index. + Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg); + if (!Src0RC) + return false; + + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) || + !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI)) + return false; + const DebugLoc &DL = I.getDebugLoc(); + BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg) + .addReg(Src0Reg) + .addReg(Src1Reg) + .addImm(SubReg); - MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) - .addDef(I.getOperand(0).getReg()) - .addReg(Src0Reg) - .addReg(Src1Reg) - .addImm(SubReg); - - for (const MachineOperand &MO : Ins->operands()) { - if (!MO.isReg()) - continue; - if (Register::isPhysicalRegister(MO.getReg())) - continue; - - const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, *MRI); - if (!RC) - continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); - } I.eraseFromParent(); return true; } diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 93e35ead4d4..3cd1b463b57 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -1,32 +1,35 @@ -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + --- -name: insert512 +name: insert_s512_s32 legalized: true regBankSelected: true -# CHECK-LABEL: insert512 -# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF -# CHECK: [[VAL:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF -# CHECK: [[BASE0:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE]], [[VAL]], %subreg.sub0 -# CHECK: [[BASE1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE0]], [[VAL]], %subreg.sub1 -# CHECK: [[BASE2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE1]], [[VAL]], %subreg.sub2 -# CHECK: [[BASE3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE2]], [[VAL]], %subreg.sub3 -# CHECK: [[BASE4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE3]], [[VAL]], %subreg.sub4 -# CHECK: [[BASE5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE4]], [[VAL]], %subreg.sub5 -# CHECK: [[BASE6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE5]], [[VAL]], %subreg.sub6 -# CHECK: [[BASE7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE6]], [[VAL]], %subreg.sub7 -# CHECK: [[BASE8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE7]], [[VAL]], %subreg.sub8 -# CHECK: [[BASE9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE8]], [[VAL]], %subreg.sub9 -# CHECK: [[BASE10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE9]], [[VAL]], %subreg.sub10 -# CHECK: [[BASE11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE10]], [[VAL]], %subreg.sub11 -# CHECK: [[BASE12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE11]], [[VAL]], %subreg.sub12 -# CHECK: [[BASE13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE12]], [[VAL]], %subreg.sub13 -# CHECK: [[BASE14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE13]], [[VAL]], %subreg.sub14 -# CHECK: [[BASE15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE14]], [[VAL]], %subreg.sub15 - body: | bb.0: + ; CHECK-LABEL: name: insert_s512_s32 + ; CHECK: [[DEF:%[0-9]+]]:sreg_512 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[DEF]], [[DEF1]], %subreg.sub0 + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG]], [[DEF1]], %subreg.sub1 + ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG1]], [[DEF1]], %subreg.sub2 + ; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG2]], [[DEF1]], %subreg.sub3 + ; CHECK: [[INSERT_SUBREG4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG3]], [[DEF1]], %subreg.sub4 + ; CHECK: [[INSERT_SUBREG5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG4]], [[DEF1]], %subreg.sub5 + ; CHECK: [[INSERT_SUBREG6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG5]], [[DEF1]], %subreg.sub6 + ; CHECK: [[INSERT_SUBREG7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG6]], [[DEF1]], %subreg.sub7 + ; CHECK: [[INSERT_SUBREG8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG7]], [[DEF1]], %subreg.sub8 + ; CHECK: [[INSERT_SUBREG9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG8]], [[DEF1]], %subreg.sub9 + ; CHECK: [[INSERT_SUBREG10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG9]], [[DEF1]], %subreg.sub10 + ; CHECK: [[INSERT_SUBREG11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG10]], [[DEF1]], %subreg.sub11 + ; CHECK: [[INSERT_SUBREG12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG11]], [[DEF1]], %subreg.sub12 + ; CHECK: [[INSERT_SUBREG13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13 + ; CHECK: [[INSERT_SUBREG14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14 + ; CHECK: [[INSERT_SUBREG15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]] + ; CHECK: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0 @@ -47,3 +50,403 @@ body: | %17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512) SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + +--- + +name: insert_v_s64_v_s32_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s64_v_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_v_s64_v_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_s_s32_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: insert_s_s64_s_s32_0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_s_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: insert_s_s64_s_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_v_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; CHECK-LABEL: name: insert_s_s64_v_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s64_s_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0 + ; CHECK-LABEL: name: insert_v_s64_s_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s96_v_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 + ; CHECK-LABEL: name: insert_v_s96_v_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(s64) = COPY $vgpr3_vgpr4 + %2:vgpr(s96) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s96_v_s64_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 + ; CHECK-LABEL: name: insert_v_s96_v_s64_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(s64) = COPY $vgpr3_vgpr4 + %2:vgpr(s96) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s96_s_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s96_s_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s96) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s96_s_s64_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s96_s_s64_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s96) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s128_s_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s128_s_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s128) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +# --- + +# name: insert_s_s128_s_s64_32 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 +# %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s128) = G_INSERT %0, %1, 32 +# S_ENDPGM 0, implicit %2 +# ... + +--- + +name: insert_s_s128_s_s64_64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s128_s_s64_64 + ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s128) = G_INSERT %0, %1, 64 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s256_s_s64_96 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK-LABEL: name: insert_s_s256_s_s64_96 + ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s64) = COPY $sgpr8_sgpr9 + %2:sgpr(s256) = G_INSERT %0, %1, 96 + S_ENDPGM 0, implicit %2 +... + +# --- + +# name: insert_s_s256_s_s64_128 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 +# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s256) = G_INSERT %0, %1, 128 +# S_ENDPGM 0, implicit %2 +# ... + +# --- + +# name: insert_s_s256_s_s64_160 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 +# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s256) = G_INSERT %0, %1, 160 +# S_ENDPGM 0, implicit %2 +# ... + +--- + +name: insert_s_s128_s_s96_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s128_s_s96_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s128) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s128_s_s96_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s128_s_s96_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s128) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_64 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 64 + S_ENDPGM 0, implicit %2 +...