mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
GlobalISel: Lower funnel shifts
This commit is contained in:
parent
53f4011707
commit
c1e5a01132
@ -345,6 +345,9 @@ public:
|
||||
LegalizeResult lowerLoad(MachineInstr &MI);
|
||||
LegalizeResult lowerStore(MachineInstr &MI);
|
||||
LegalizeResult lowerBitCount(MachineInstr &MI);
|
||||
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
|
||||
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
|
||||
LegalizeResult lowerFunnelShift(MachineInstr &MI);
|
||||
|
||||
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
|
||||
LegalizeResult lowerUITOFP(MachineInstr &MI);
|
||||
|
@ -1444,6 +1444,13 @@ public:
|
||||
return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
|
||||
}
|
||||
|
||||
/// Build and insert \p Res = G_UREM \p Op0, \p Op1
|
||||
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1,
|
||||
Optional<unsigned> Flags = None) {
|
||||
return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags);
|
||||
}
|
||||
|
||||
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1,
|
||||
Optional<unsigned> Flags = None) {
|
||||
|
@ -328,6 +328,13 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Attempt to match a unary predicate against a scalar/splat constant or every
|
||||
/// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
|
||||
/// value was undef.
|
||||
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg,
|
||||
std::function<bool(const Constant *ConstVal)> Match,
|
||||
bool AllowUndefs = false);
|
||||
|
||||
/// Returns true if given the TargetLowering's boolean contents information,
|
||||
/// the value \p Val contains a true value.
|
||||
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
|
@ -3210,6 +3210,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
|
||||
case G_SDIVREM:
|
||||
case G_UDIVREM:
|
||||
return lowerDIVREM(MI);
|
||||
case G_FSHL:
|
||||
case G_FSHR:
|
||||
return lowerFunnelShift(MI);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5207,6 +5210,132 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check that (every element of) Reg is undef or not an exact multiple of BW.
|
||||
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
|
||||
Register Reg, unsigned BW) {
|
||||
return matchUnaryPredicate(
|
||||
MRI, Reg,
|
||||
[=](const Constant *C) {
|
||||
// Null constant here means an undef.
|
||||
const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
|
||||
return !CI || CI->getValue().urem(BW) != 0;
|
||||
},
|
||||
/*AllowUndefs*/ true);
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register X = MI.getOperand(1).getReg();
|
||||
Register Y = MI.getOperand(2).getReg();
|
||||
Register Z = MI.getOperand(3).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShTy = MRI.getType(Z);
|
||||
|
||||
unsigned BW = Ty.getScalarSizeInBits();
|
||||
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
|
||||
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
|
||||
|
||||
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
|
||||
// fshl X, Y, Z -> fshr X, Y, -Z
|
||||
// fshr X, Y, Z -> fshl X, Y, -Z
|
||||
auto Zero = MIRBuilder.buildConstant(ShTy, 0);
|
||||
Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
|
||||
} else {
|
||||
// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
|
||||
// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
|
||||
auto One = MIRBuilder.buildConstant(ShTy, 1);
|
||||
if (IsFSHL) {
|
||||
Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
|
||||
X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
|
||||
} else {
|
||||
X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
|
||||
Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
|
||||
}
|
||||
|
||||
Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
|
||||
}
|
||||
|
||||
MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register X = MI.getOperand(1).getReg();
|
||||
Register Y = MI.getOperand(2).getReg();
|
||||
Register Z = MI.getOperand(3).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShTy = MRI.getType(Z);
|
||||
|
||||
const unsigned BW = Ty.getScalarSizeInBits();
|
||||
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
|
||||
|
||||
Register ShX, ShY;
|
||||
Register ShAmt, InvShAmt;
|
||||
|
||||
// FIXME: Emit optimized urem by constant instead of letting it expand later.
|
||||
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
|
||||
// fshl: X << C | Y >> (BW - C)
|
||||
// fshr: X << (BW - C) | Y >> C
|
||||
// where C = Z % BW is not zero
|
||||
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
|
||||
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
|
||||
InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
|
||||
ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
|
||||
ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
|
||||
} else {
|
||||
// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
|
||||
// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
|
||||
auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
|
||||
if (isPowerOf2_32(BW)) {
|
||||
// Z % BW -> Z & (BW - 1)
|
||||
ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
|
||||
// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
|
||||
auto NotZ = MIRBuilder.buildNot(ShTy, Z);
|
||||
InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
|
||||
} else {
|
||||
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
|
||||
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
|
||||
InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
|
||||
}
|
||||
|
||||
auto One = MIRBuilder.buildConstant(ShTy, 1);
|
||||
if (IsFSHL) {
|
||||
ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
|
||||
auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
|
||||
ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
|
||||
} else {
|
||||
auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
|
||||
ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
|
||||
ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
|
||||
}
|
||||
}
|
||||
|
||||
MIRBuilder.buildOr(Dst, ShX, ShY);
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
|
||||
// These operations approximately do the following (while avoiding undefined
|
||||
// shifts by BW):
|
||||
// G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
|
||||
// G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
|
||||
|
||||
bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
|
||||
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
|
||||
if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
|
||||
return lowerFunnelShiftAsShifts(MI);
|
||||
return lowerFunnelShiftWithInverse(MI);
|
||||
}
|
||||
|
||||
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
|
||||
// representation.
|
||||
LegalizerHelper::LegalizeResult
|
||||
|
@ -926,6 +926,38 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
|
||||
return RegOrConstant(Reg);
|
||||
}
|
||||
|
||||
bool llvm::matchUnaryPredicate(
|
||||
const MachineRegisterInfo &MRI, Register Reg,
|
||||
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
|
||||
|
||||
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
|
||||
if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
|
||||
return Match(nullptr);
|
||||
|
||||
// TODO: Also handle fconstant
|
||||
if (Def->getOpcode() == TargetOpcode::G_CONSTANT)
|
||||
return Match(Def->getOperand(1).getCImm());
|
||||
|
||||
if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
|
||||
return false;
|
||||
|
||||
for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
|
||||
Register SrcElt = Def->getOperand(I).getReg();
|
||||
const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI);
|
||||
if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
|
||||
if (!Match(nullptr))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT ||
|
||||
!Match(SrcDef->getOperand(1).getCImm()))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
bool IsFP) {
|
||||
switch (TLI.getBooleanContents(IsVector, IsFP)) {
|
||||
|
@ -1595,11 +1595,26 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
.clampScalar(0, S32, S64)
|
||||
.lower();
|
||||
|
||||
// TODO: Only Try to form v2s16 with legal packed instructions.
|
||||
getActionDefinitionsBuilder(G_FSHR)
|
||||
.legalFor({{S32, S32}})
|
||||
.lowerFor({{V2S16, V2S16}})
|
||||
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
|
||||
.scalarize(0)
|
||||
.lower();
|
||||
|
||||
if (ST.hasVOP3PInsts()) {
|
||||
getActionDefinitionsBuilder(G_FSHL)
|
||||
.lowerFor({{V2S16, V2S16}})
|
||||
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
|
||||
.scalarize(0)
|
||||
.lower();
|
||||
} else {
|
||||
getActionDefinitionsBuilder(G_FSHL)
|
||||
.scalarize(0)
|
||||
.lower();
|
||||
}
|
||||
|
||||
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
|
||||
.legalFor({S64});
|
||||
|
||||
@ -1624,9 +1639,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
G_SADDO, G_SSUBO,
|
||||
|
||||
// TODO: Implement
|
||||
G_FMINIMUM, G_FMAXIMUM,
|
||||
G_FSHL
|
||||
}).lower();
|
||||
G_FMINIMUM, G_FMAXIMUM}).lower();
|
||||
|
||||
getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
|
||||
G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
|
||||
|
7474
test/CodeGen/AMDGPU/GlobalISel/fshl.ll
Normal file
7474
test/CodeGen/AMDGPU/GlobalISel/fshl.ll
Normal file
File diff suppressed because it is too large
Load Diff
7572
test/CodeGen/AMDGPU/GlobalISel/fshr.ll
Normal file
7572
test/CodeGen/AMDGPU/GlobalISel/fshr.ll
Normal file
File diff suppressed because it is too large
Load Diff
1254
test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
Normal file
1254
test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user