1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG

We don't need to expand to full shifts for the > 32-bit case. This
just switches to a sext_inreg of the high half.
This commit is contained in:
Matt Arsenault 2020-01-25 09:31:09 -05:00
parent acf5f4aa9b
commit d15b15865e
2 changed files with 68 additions and 56 deletions

View File

@ -1799,53 +1799,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_SEXT_INREG: {
const RegisterBank *SrcBank =
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
// We can directly handle all 64-bit cases with s_bfe_i64.
if (SrcBank == &AMDGPU::SGPRRegBank)
break;
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
if (SrcRegs.empty())
break; // Nothing to repair
const LLT S32 = LLT::scalar(32);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(DstReg);
if (Ty == S32)
break;
MachineIRBuilder B(MI);
ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
GISelObserverWrapper Observer(&O);
B.setChangeObserver(Observer);
int Amt = MI.getOperand(2).getImm();
if (Amt <= 32) {
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
// we would need to further expand, and doesn't let us directly set the
// result registers.
SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
if (SrcRegs.empty())
split64BitValueForMapping(B, SrcRegs, S32, SrcReg);
// Extend in the low bits and propagate the sign bit to the high half.
auto ShiftAmt = B.buildConstant(S32, 31);
int Amt = MI.getOperand(2).getImm();
if (Amt <= 32) {
if (Amt == 32) {
// The low bits are unchanged.
B.buildCopy(DstRegs[0], SrcRegs[0]);
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
} else {
// Extend in the low bits and propagate the sign bit to the high half.
B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
}
} else {
assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1)));
const LLT S64 = LLT::scalar(64);
// This straddles two registers. Expand with 64-bit shifts.
auto ShiftAmt = B.buildConstant(S32, 64 - Amt);
auto Shl = B.buildShl(S64, SrcReg, ShiftAmt);
B.buildAShr(DstReg, Shl, ShiftAmt);
}
B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
} else {
// The low bits are unchanged, and extend in the high bits.
B.buildCopy(DstRegs[0], SrcRegs[0]);
B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
}
Register DstReg = MI.getOperand(0).getReg();
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
return;
@ -2965,7 +2951,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_ZEXT:
case AMDGPU::G_SEXT:
case AMDGPU::G_ANYEXT: {
case AMDGPU::G_ANYEXT:
case AMDGPU::G_SEXT_INREG: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@ -2996,24 +2983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
case AMDGPU::G_SEXT_INREG: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getImm();
unsigned Size = getSizeInBits(Dst, MRI, *TRI);
unsigned BankID = getRegBank(Src, MRI, *TRI)->getID();
if (Amt <= 32) {
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
} else {
// If we need to expand a 64 bit for the VALU, this will straddle two
// registers. Just expand this with 64-bit shifts.
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size);
}
OpdsMapping[1] = OpdsMapping[0];
break;
}
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);

View File

@ -121,8 +121,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_1
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@ -143,8 +143,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@ -165,8 +165,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_32
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@ -186,12 +186,55 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_33
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32)
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64)
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXT_INREG %0, 33
S_ENDPGM 0, implicit %1
...
---
name: sext_inreg_v_s64_35
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: sext_inreg_v_s64_35
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXT_INREG %0, 35
S_ENDPGM 0, implicit %1
...
---
name: sext_inreg_v_s64_63
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: sext_inreg_v_s64_63
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXT_INREG %0, 63
S_ENDPGM 0, implicit %1
...