mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG
We don't need to expand to full shifts for the > 32-bit case. This just switches to a sext_inreg of the high half.
This commit is contained in:
parent
acf5f4aa9b
commit
d15b15865e
@ -1799,53 +1799,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_SEXT_INREG: {
|
||||
const RegisterBank *SrcBank =
|
||||
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
|
||||
|
||||
// We can directly handle all 64-bit cases with s_bfe_i64.
|
||||
if (SrcBank == &AMDGPU::SGPRRegBank)
|
||||
break;
|
||||
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
|
||||
if (SrcRegs.empty())
|
||||
break; // Nothing to repair
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
LLT Ty = MRI.getType(DstReg);
|
||||
if (Ty == S32)
|
||||
break;
|
||||
|
||||
MachineIRBuilder B(MI);
|
||||
ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
|
||||
GISelObserverWrapper Observer(&O);
|
||||
B.setChangeObserver(Observer);
|
||||
|
||||
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
|
||||
// we would need to further expand, and doesn't let us directly set the
|
||||
// result registers.
|
||||
SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
|
||||
|
||||
int Amt = MI.getOperand(2).getImm();
|
||||
if (Amt <= 32) {
|
||||
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
|
||||
// we would need to further expand, and doesn't let us directly set the
|
||||
// result registers.
|
||||
SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
|
||||
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
|
||||
|
||||
if (SrcRegs.empty())
|
||||
split64BitValueForMapping(B, SrcRegs, S32, SrcReg);
|
||||
// Extend in the low bits and propagate the sign bit to the high half.
|
||||
auto ShiftAmt = B.buildConstant(S32, 31);
|
||||
if (Amt == 32) {
|
||||
// The low bits are unchanged.
|
||||
B.buildCopy(DstRegs[0], SrcRegs[0]);
|
||||
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
|
||||
} else {
|
||||
// Extend in the low bits and propagate the sign bit to the high half.
|
||||
B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
|
||||
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
|
||||
}
|
||||
|
||||
B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
|
||||
} else {
|
||||
assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1)));
|
||||
const LLT S64 = LLT::scalar(64);
|
||||
// This straddles two registers. Expand with 64-bit shifts.
|
||||
auto ShiftAmt = B.buildConstant(S32, 64 - Amt);
|
||||
auto Shl = B.buildShl(S64, SrcReg, ShiftAmt);
|
||||
B.buildAShr(DstReg, Shl, ShiftAmt);
|
||||
// The low bits are unchanged, and extend in the high bits.
|
||||
B.buildCopy(DstRegs[0], SrcRegs[0]);
|
||||
B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
|
||||
}
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
|
||||
MI.eraseFromParent();
|
||||
return;
|
||||
@ -2965,7 +2951,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
}
|
||||
case AMDGPU::G_ZEXT:
|
||||
case AMDGPU::G_SEXT:
|
||||
case AMDGPU::G_ANYEXT: {
|
||||
case AMDGPU::G_ANYEXT:
|
||||
case AMDGPU::G_SEXT_INREG: {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
|
||||
@ -2996,24 +2983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_SEXT_INREG: {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
Register Amt = MI.getOperand(2).getImm();
|
||||
unsigned Size = getSizeInBits(Dst, MRI, *TRI);
|
||||
unsigned BankID = getRegBank(Src, MRI, *TRI)->getID();
|
||||
|
||||
if (Amt <= 32) {
|
||||
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
|
||||
} else {
|
||||
// If we need to expand a 64 bit for the VALU, this will straddle two
|
||||
// registers. Just expand this with 64-bit shifts.
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size);
|
||||
}
|
||||
|
||||
OpdsMapping[1] = OpdsMapping[0];
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_FCMP: {
|
||||
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
|
||||
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
|
@ -121,8 +121,8 @@ body: |
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_1
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
@ -143,8 +143,8 @@ body: |
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
@ -165,8 +165,8 @@ body: |
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_32
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
@ -186,12 +186,55 @@ body: |
|
||||
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_33
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64)
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_SEXT_INREG %0, 33
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: sext_inreg_v_s64_35
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_35
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_SEXT_INREG %0, 35
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: sext_inreg_v_s64_63
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; CHECK-LABEL: name: sext_inreg_v_s64_63
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31
|
||||
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
|
||||
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_SEXT_INREG %0, 63
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user