From d15b15865e52440404f8b37a54a65777f897db30 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 25 Jan 2020 09:31:09 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG We don't need to expand to full shifts for the > 32-bit case. This just switches to a sext_inreg of the high half. --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 67 +++++-------------- .../GlobalISel/regbankselect-sext-inreg.mir | 57 ++++++++++++++-- 2 files changed, 68 insertions(+), 56 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5dabd8b83a8..4943f99afa7 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1799,53 +1799,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case AMDGPU::G_SEXT_INREG: { - const RegisterBank *SrcBank = - OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; - - // We can directly handle all 64-bit cases with s_bfe_i64. - if (SrcBank == &AMDGPU::SGPRRegBank) - break; + SmallVector SrcRegs(OpdMapper.getVRegs(1)); + if (SrcRegs.empty()) + break; // Nothing to repair const LLT S32 = LLT::scalar(32); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT Ty = MRI.getType(DstReg); - if (Ty == S32) - break; - MachineIRBuilder B(MI); ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank); GISelObserverWrapper Observer(&O); B.setChangeObserver(Observer); + // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs + // we would need to further expand, and doesn't let us directly set the + // result registers. + SmallVector DstRegs(OpdMapper.getVRegs(0)); + int Amt = MI.getOperand(2).getImm(); if (Amt <= 32) { - // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs - // we would need to further expand, and doesn't let us directly set the - // result registers. - SmallVector DstRegs(OpdMapper.getVRegs(0)); - SmallVector SrcRegs(OpdMapper.getVRegs(1)); - - if (SrcRegs.empty()) - split64BitValueForMapping(B, SrcRegs, S32, SrcReg); - // Extend in the low bits and propagate the sign bit to the high half. - auto ShiftAmt = B.buildConstant(S32, 31); if (Amt == 32) { + // The low bits are unchanged. B.buildCopy(DstRegs[0], SrcRegs[0]); - B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt); } else { + // Extend in the low bits and propagate the sign bit to the high half. B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt); - B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt); } + + B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31)); } else { - assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1))); - const LLT S64 = LLT::scalar(64); - // This straddles two registers. Expand with 64-bit shifts. - auto ShiftAmt = B.buildConstant(S32, 64 - Amt); - auto Shl = B.buildShl(S64, SrcReg, ShiftAmt); - B.buildAShr(DstReg, Shl, ShiftAmt); + // The low bits are unchanged, and extend in the high bits. + B.buildCopy(DstRegs[0], SrcRegs[0]); + B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32); } + Register DstReg = MI.getOperand(0).getReg(); MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); MI.eraseFromParent(); return; @@ -2965,7 +2951,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_ZEXT: case AMDGPU::G_SEXT: - case AMDGPU::G_ANYEXT: { + case AMDGPU::G_ANYEXT: + case AMDGPU::G_SEXT_INREG: { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); unsigned DstSize = getSizeInBits(Dst, MRI, *TRI); @@ -2996,24 +2983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } - case AMDGPU::G_SEXT_INREG: { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - Register Amt = MI.getOperand(2).getImm(); - unsigned Size = getSizeInBits(Dst, MRI, *TRI); - unsigned BankID = getRegBank(Src, MRI, *TRI)->getID(); - - if (Amt <= 32) { - OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size); - } else { - // If we need to expand a 64 bit for the VALU, this will straddle two - // registers. Just expand this with 64-bit shifts. - OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size); - } - - OpdsMapping[1] = OpdsMapping[0]; - break; - } case AMDGPU::G_FCMP: { unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir index 9d812fde685..2e72381795a 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir @@ -121,8 +121,8 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_1 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -143,8 +143,8 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_31 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -165,8 +165,8 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_32 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) @@ -186,12 +186,55 @@ body: | ; CHECK-LABEL: name: sext_inreg_v_s64_33 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32) - ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 33 S_ENDPGM 0, implicit %1 ... + +--- +name: sext_inreg_v_s64_35 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v_s64_35 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXT_INREG %0, 35 + S_ENDPGM 0, implicit %1 + +... + +--- +name: sext_inreg_v_s64_63 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v_s64_63 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXT_INREG %0, 63 + S_ENDPGM 0, implicit %1 + +...