diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index ca055f6c957..681ab3a2750 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -516,6 +516,10 @@ public: return FMA; } + bool hasSwap() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index d37ad077dd6..6e58c138a76 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -212,6 +212,169 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { } } +// This is the same as MachineInstr::readsRegister/modifiesRegister except +// it takes subregs into account. +static bool instAccessReg(iterator_range &&R, + unsigned Reg, unsigned SubReg, + const SIRegisterInfo &TRI) { + for (const MachineOperand &MO : R) { + if (!MO.isReg()) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (TRI.regsOverlap(Reg, MO.getReg())) + return true; + } else if (MO.getReg() == Reg && + TargetRegisterInfo::isVirtualRegister(Reg)) { + LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) & + TRI.getSubRegIndexLaneMask(MO.getSubReg()); + if (Overlap.any()) + return true; + } + } + return false; +} + +static bool instReadsReg(const MachineInstr *MI, + unsigned Reg, unsigned SubReg, + const SIRegisterInfo &TRI) { + return instAccessReg(MI->uses(), Reg, SubReg, TRI); +} + +static bool instModifiesReg(const MachineInstr *MI, + unsigned Reg, unsigned SubReg, + const SIRegisterInfo &TRI) { + return instAccessReg(MI->defs(), Reg, SubReg, TRI); +} + +static TargetInstrInfo::RegSubRegPair +getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, + const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { + if (TRI.getRegSizeInBits(Reg, MRI) != 32) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); + } else { + LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub); + Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger())); + } + } + return TargetInstrInfo::RegSubRegPair(Reg, Sub); +} + +// Match: +// mov t, x +// mov x, y +// mov y, t +// +// => +// +// mov t, x (t is potentially dead and move eliminated) +// v_swap_b32 x, y +// +// Returns next valid instruction pointer if was able to create v_swap_b32. +// +// This shall not be done too early not to prevent possible folding which may +// remove matched moves, and this should prefereably be done before RA to +// release saved registers and also possibly after RA which can insert copies +// too. +// +// This is really just a generic peephole that is not a canocical shrinking, +// although requirements match the pass placement and it reduces code size too. +static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, + const SIInstrInfo *TII) { + assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MovT.getOpcode() == AMDGPU::COPY); + + unsigned T = MovT.getOperand(0).getReg(); + unsigned Tsub = MovT.getOperand(0).getSubReg(); + MachineOperand &Xop = MovT.getOperand(1); + + if (!Xop.isReg()) + return nullptr; + unsigned X = Xop.getReg(); + unsigned Xsub = Xop.getSubReg(); + + unsigned Size = TII->getOpSize(MovT, 0) / 4; + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + if (!TRI.isVGPR(MRI, X)) + return false; + + for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) { + if (YTop.getSubReg() != Tsub) + continue; + + MachineInstr &MovY = *YTop.getParent(); + if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 && + MovY.getOpcode() != AMDGPU::COPY) || + MovY.getOperand(1).getSubReg() != Tsub) + continue; + + unsigned Y = MovY.getOperand(0).getReg(); + unsigned Ysub = MovY.getOperand(0).getSubReg(); + + if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent()) + continue; + + MachineInstr *MovX = nullptr; + auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end(); + for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) { + if (instReadsReg(&*I, X, Xsub, TRI) || + instModifiesReg(&*I, Y, Ysub, TRI) || + instModifiesReg(&*I, T, Tsub, TRI) || + (MovX && instModifiesReg(&*I, X, Xsub, TRI))) { + MovX = nullptr; + break; + } + if (!instReadsReg(&*I, Y, Ysub, TRI)) { + if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) { + MovX = nullptr; + break; + } + continue; + } + if (MovX || + (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && + I->getOpcode() != AMDGPU::COPY) || + I->getOperand(0).getReg() != X || + I->getOperand(0).getSubReg() != Xsub) { + MovX = nullptr; + break; + } + MovX = &*I; + } + + if (!MovX || I == E) + continue; + + LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY); + + for (unsigned I = 0; I < Size; ++I) { + TargetInstrInfo::RegSubRegPair X1, Y1; + X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI); + Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI); + BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(), + TII->get(AMDGPU::V_SWAP_B32)) + .addDef(X1.Reg, 0, X1.SubReg) + .addDef(Y1.Reg, 0, Y1.SubReg) + .addReg(Y1.Reg, 0, Y1.SubReg) + .addReg(X1.Reg, 0, X1.SubReg).getInstr(); + } + MovX->eraseFromParent(); + MovY.eraseFromParent(); + MachineInstr *Next = &*std::next(MovT.getIterator()); + if (MRI.use_nodbg_empty(T)) + MovT.eraseFromParent(); + else + Xop.setIsKill(false); + + return Next; + } + + return nullptr; +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -252,6 +415,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } } + if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::COPY)) { + if (auto *NextMI = matchSwap(MI, MRI, TII)) { + Next = NextMI->getIterator(); + continue; + } + } + // Combine adjacent s_nops to use the immediate operand encoding how long // to wait. // diff --git a/test/CodeGen/AMDGPU/v_swap_b32.mir b/test/CodeGen/AMDGPU/v_swap_b32.mir new file mode 100644 index 00000000000..f0ce14bb9dd --- /dev/null +++ b/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -0,0 +1,564 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: swap_phys_condensed +# GCN: bb.0: +# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec +# GCN-NEXT: S_SETPC_B64_return +--- +name: swap_phys_condensed +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_phys_sparse +# GCN: bb.0: +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec +# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec +# GCN-NEXT: S_SETPC_B64_return +--- +name: swap_phys_sparse +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_phys_liveout +# GCN: bb.0: +# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec +# GCN-NEXT: S_SETPC_B64_return +--- +name: swap_phys_liveout +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_phys_b64 +# GCN: bb.0: +# GCN-NEXT: $vgpr0, $vgpr2 = V_SWAP_B32 $vgpr2, $vgpr0, implicit $exec +# GCN-NEXT: $vgpr1, $vgpr3 = V_SWAP_B32 $vgpr3, $vgpr1, implicit $exec +--- +name: swap_phys_b64 +body: | + bb.0: + $vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1 + $vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3 + $vgpr2_vgpr3 = COPY killed $vgpr4_vgpr5 +... + +# GCN-LABEL: name: swap_phys_overlap_x +# GCN: bb.0: +# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +--- +name: swap_phys_overlap_x +body: | + bb.0: + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +... + +# GCN-LABEL: name: swap_phys_clobber_y +# GCN: bb.0: +# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +--- +name: swap_phys_clobber_y +body: | + bb.0: + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_copy_condense +# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec +--- +name: swap_virt_copy_condense +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_copy_sparse +# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec +--- +name: swap_virt_copy_sparse +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + S_NOP 0 + %0 = COPY %1 + S_NOP 0 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_copy_subreg +# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +--- +name: swap_virt_copy_subreg +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0 + %2.sub1 = COPY %0.sub1 + %0.sub0 = COPY %1.sub0 + %0.sub1 = COPY %1.sub1 + %1.sub0 = COPY %2.sub0 +... + +# GCN-LABEL: name: swap_virt_mov +# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec +--- +name: swap_virt_mov +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = V_MOV_B32_e32 %0, implicit $exec + %0 = V_MOV_B32_e32 %1, implicit $exec + %1 = V_MOV_B32_e32 %2, implicit $exec +... + +# GCN-LABEL: name: swap_virt_read_x +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %3:vgpr_32 = COPY %0 +# GCN-NEXT: %0:vgpr_32 = COPY %1 +# GCN-NEXT: %1:vgpr_32 = COPY %2 +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_read_x +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %3 = COPY %0 + %0 = COPY %1 + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_read_t_twice +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %3:vgpr_32 = COPY %2 +# GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_read_t_twice +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %3 = COPY %2 + %0 = COPY %1 + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_clobber_y +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %0:vgpr_32 = COPY %1 +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = COPY %2 +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_clobber_y +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %1 = IMPLICIT_DEF + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_clobber_x1 +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %0:vgpr_32 = COPY %1 +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = COPY %2 +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_clobber_x1 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %0 = IMPLICIT_DEF + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_clobber_x2 +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %0:vgpr_32 = COPY %1 +# GCN-NEXT: %1:vgpr_32 = COPY %2 +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_clobber_x2 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = IMPLICIT_DEF + %0 = COPY %1 + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_clobber_t +# GCN: bb.0: +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:vgpr_32 = COPY %0 +# GCN-NEXT: %0:vgpr_32 = COPY %1 +# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:vgpr_32 = COPY %2 +# GCN-NEXT: S_ENDPGM + +--- +name: swap_virt_clobber_t +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %2 = IMPLICIT_DEF + %1 = COPY %2 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_full +# GCN: bb.0: +# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0 +# GCN-NEXT: %3:vreg_64 = COPY %0 +# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0 +# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0 +--- +name: swap_virt_copy_subreg_overlap_x_full +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } + - { id: 3, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0 + %3 = COPY %0 + %0.sub0 = COPY %1.sub0 + %1.sub0 = COPY %2.sub0 +... + +# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_part +# GCN: bb.0: +# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0 +# GCN-NEXT: %3:vreg_64 = COPY %0.sub0_sub1 +# GCN-NEXT: %0.sub0:vreg_128 = COPY %1.sub0 +# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0 +--- +name: swap_virt_copy_subreg_overlap_x_part +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } + - { id: 3, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0 + %3 = COPY %0.sub0_sub1 + %0.sub0 = COPY %1.sub0 + %1.sub0 = COPY %2.sub0 +... + +# GCN-LABEL: name: swap_virt_copy_subreg_wide_y +# GCN: bb.0: +# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0 +# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0 +# GCN-NEXT: %1:vreg_64 = COPY %2 +--- +name: swap_virt_copy_subreg_wide_y +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0 + %0.sub0 = COPY %1.sub0 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_b64 +# GCN: bb.0: +# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +# GCN-NEXT: %0.sub1:vreg_64, %1.sub1:vreg_64 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec +--- +name: swap_virt_b64 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_b128 +# GCN: bb.0: +# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec +# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec +# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec +--- +name: swap_virt_b128 +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: vreg_128 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_b128_sub0_1 +# GCN: bb.0: +# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec +# GCN-NEXT: S_ENDPGM +--- +name: swap_virt_b128_sub0_1 +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: vreg_128 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0_sub1 = COPY %0.sub0_sub1 + %0.sub0_sub1 = COPY %1.sub0_sub1 + %1.sub0_sub1 = COPY %2.sub0_sub1 + S_ENDPGM +... + +# GCN-LABEL: name: swap_virt_b128_sub2_3 +# GCN: bb.0: +# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF +# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec +# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec +# GCN-NEXT: S_ENDPGM +--- +name: swap_virt_b128_sub2_3 +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: vreg_128 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub2_sub3 = COPY %0.sub2_sub3 + %0.sub2_sub3 = COPY %1.sub2_sub3 + %1.sub2_sub3 = COPY %2.sub2_sub3 + S_ENDPGM +... + + +# GCN-LABEL: name: swap_virt_s_to_s +# GCN: bb.0: +# GCN-NEXT: %0:sgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %1:sgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %2:sgpr_32 = COPY %0 +# GCN-NEXT: %0:sgpr_32 = COPY %1 +# GCN-NEXT: %1:sgpr_32 = COPY %2 +--- +name: swap_virt_s_to_s +registers: + - { id: 0, class: sgpr_32 } + - { id: 1, class: sgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1 + %1 = COPY %2 +... + +# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super +# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +--- +name: swap_virt_copy_subreg_impdef_super +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0, implicit-def %2, implicit $exec + %2.sub1 = COPY %0.sub1 + %0.sub0 = COPY %1.sub0 + %0.sub1 = COPY %1.sub1 + %1.sub0 = COPY %2.sub0 +... + +# GCN-LABEL: name: swap_virt_copy_subreg_impuse_x +# GCN: bb.0: +# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0 +# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1 +# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0, implicit %0 +# GCN-NEXT: %0.sub1:vreg_64 = COPY %1.sub1 +# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0 +# GCN-NEXT: S_ENDPGM +--- +name: swap_virt_copy_subreg_impuse_x +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2.sub0 = COPY %0.sub0 + %2.sub1 = COPY %0.sub1 + %0.sub0 = COPY %1.sub0, implicit %0 + %0.sub1 = COPY %1.sub1 + %1.sub0 = COPY %2.sub0 + S_ENDPGM +...