1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

AMDGPU/GlobalISel: Custom lower 32-bit G_SDIV/G_SREM

This commit is contained in:
Matt Arsenault 2020-02-11 20:48:45 -05:00 committed by Matt Arsenault
parent 179e6ab402
commit 4547afd953
6 changed files with 4009 additions and 238 deletions

View File

@ -1354,6 +1354,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
return legalizeUDIV_UREM(MI, MRI, B);
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
return legalizeSDIV_SREM(MI, MRI, B);
case TargetOpcode::G_ATOMIC_CMPXCHG:
return legalizeAtomicCmpXChg(MI, MRI, B);
case TargetOpcode::G_FLOG:
@ -2329,19 +2332,14 @@ static Register buildDivRCP(MachineIRBuilder &B, Register Src) {
return B.buildFPTOUI(S32, Mul).getReg(0);
}
bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
B.setInstr(MI);
bool IsRem = MI.getOpcode() == AMDGPU::G_UREM;
void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
Register DstReg,
Register Num,
Register Den,
bool IsRem) const {
const LLT S1 = LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
Register DstReg = MI.getOperand(0).getReg();
Register Num = MI.getOperand(1).getReg();
Register Den = MI.getOperand(2).getReg();
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
auto RCP = buildDivRCP(B, Den);
@ -2422,7 +2420,17 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
} else {
B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One);
}
}
bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
B.setInstr(MI);
const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM;
Register DstReg = MI.getOperand(0).getReg();
Register Num = MI.getOperand(1).getReg();
Register Den = MI.getOperand(2).getReg();
legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsRem);
MI.eraseFromParent();
return true;
}
@ -2435,6 +2443,52 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
return false;
}
bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
B.setInstr(MI);
const LLT S32 = LLT::scalar(32);
const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM;
Register DstReg = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
auto ThirtyOne = B.buildConstant(S32, 31);
auto LHSign = B.buildAShr(S32, LHS, ThirtyOne);
auto RHSign = B.buildAShr(S32, LHS, ThirtyOne);
LHS = B.buildAdd(S32, LHS, LHSign).getReg(0);
RHS = B.buildAdd(S32, RHS, RHSign).getReg(0);
LHS = B.buildXor(S32, LHS, LHSign).getReg(0);
RHS = B.buildXor(S32, RHS, RHSign).getReg(0);
Register UDivRem = MRI.createGenericVirtualRegister(S32);
legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsRem);
if (IsRem) {
auto RSign = LHSign; // Remainder sign is the same as LHS
UDivRem = B.buildXor(S32, UDivRem, RSign).getReg(0);
B.buildSub(DstReg, UDivRem, RSign);
} else {
auto DSign = B.buildXor(S32, LHSign, RHSign);
UDivRem = B.buildXor(S32, UDivRem, DSign).getReg(0);
B.buildSub(DstReg, UDivRem, DSign);
}
MI.eraseFromParent();
return true;
}
bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32))
return legalizeSDIV_SREM32(MI, MRI, B);
return false;
}
bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {

View File

@ -99,9 +99,18 @@ public:
bool legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
void legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
Register DstReg, Register Num, Register Den,
bool IsRem) const;
bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff