1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

GlobalISel: Lower funnel shifts

This commit is contained in:
Matt Arsenault 2020-03-19 22:48:13 -04:00
parent 53f4011707
commit c1e5a01132
10 changed files with 17927 additions and 75 deletions

View File

@ -345,6 +345,9 @@ public:
LegalizeResult lowerLoad(MachineInstr &MI);
LegalizeResult lowerStore(MachineInstr &MI);
LegalizeResult lowerBitCount(MachineInstr &MI);
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
LegalizeResult lowerFunnelShift(MachineInstr &MI);
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
LegalizeResult lowerUITOFP(MachineInstr &MI);

View File

@ -1444,6 +1444,13 @@ public:
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_UREM \p Op0, \p Op1
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags);
}
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {

View File

@ -328,6 +328,13 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
/// Attempt to match a unary predicate against a scalar/splat constant or every
/// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
/// value was undef.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match,
bool AllowUndefs = false);
/// Returns true if given the TargetLowering's boolean contents information,
/// the value \p Val contains a true value.
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,

View File

@ -3210,6 +3210,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SDIVREM:
case G_UDIVREM:
return lowerDIVREM(MI);
case G_FSHL:
case G_FSHR:
return lowerFunnelShift(MI);
}
}
@ -5207,6 +5210,132 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
}
}
// Check that (every element of) Reg is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
Register Reg, unsigned BW) {
return matchUnaryPredicate(
MRI, Reg,
[=](const Constant *C) {
// Null constant here means an undef.
const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
return !CI || CI->getValue().urem(BW) != 0;
},
/*AllowUndefs*/ true);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
unsigned BW = Ty.getScalarSizeInBits();
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
// fshl X, Y, Z -> fshr X, Y, -Z
// fshr X, Y, Z -> fshl X, Y, -Z
auto Zero = MIRBuilder.buildConstant(ShTy, 0);
Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
} else {
// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
} else {
X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
}
Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
}
MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
const unsigned BW = Ty.getScalarSizeInBits();
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
Register ShX, ShY;
Register ShAmt, InvShAmt;
// FIXME: Emit optimized urem by constant instead of letting it expand later.
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
// fshl: X << C | Y >> (BW - C)
// fshr: X << (BW - C) | Y >> C
// where C = Z % BW is not zero
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
} else {
// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
if (isPowerOf2_32(BW)) {
// Z % BW -> Z & (BW - 1)
ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
auto NotZ = MIRBuilder.buildNot(ShTy, Z);
InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
} else {
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
}
auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
} else {
auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
}
}
MIRBuilder.buildOr(Dst, ShX, ShY);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
// These operations approximately do the following (while avoiding undefined
// shifts by BW):
// G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
return lowerFunnelShiftAsShifts(MI);
return lowerFunnelShiftWithInverse(MI);
}
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
// representation.
LegalizerHelper::LegalizeResult

View File

@ -926,6 +926,38 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
return Match(nullptr);
// TODO: Also handle fconstant
if (Def->getOpcode() == TargetOpcode::G_CONSTANT)
return Match(Def->getOperand(1).getCImm());
if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
return false;
for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
Register SrcElt = Def->getOperand(I).getReg();
const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI);
if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
if (!Match(nullptr))
return false;
continue;
}
if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT ||
!Match(SrcDef->getOperand(1).getCImm()))
return false;
}
return true;
}
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {

View File

@ -1595,11 +1595,26 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S32, S64)
.lower();
// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.scalarize(0)
.lower();
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_FSHL)
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.scalarize(0)
.lower();
} else {
getActionDefinitionsBuilder(G_FSHL)
.scalarize(0)
.lower();
}
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});
@ -1624,9 +1639,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_SADDO, G_SSUBO,
// TODO: Implement
G_FMINIMUM, G_FMAXIMUM,
G_FSHL
}).lower();
G_FMINIMUM, G_FMAXIMUM}).lower();
getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff