mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
Assume lane masks are always precise
Allowing imprecise lane masks in case of more than 32 sub register lanes lead to some tricky corner cases, and I need another bugfix for another one. Instead I rather declare lane masks as precise and let tablegen abort if we do not have enough bits. This does not affect any in-tree target, even AMDGPU only needs 16 lanes at the moment. If the 32 lanes turn out to be a problem in the future, then we can easily change the LaneBitmask typedef to uint64_t. Differential Revision: http://reviews.llvm.org/D14557 llvm-svn: 253279
This commit is contained in:
parent
11eb1233c6
commit
088a172bf6
@ -35,24 +35,21 @@ class VirtRegMap;
|
||||
class raw_ostream;
|
||||
class LiveRegMatrix;
|
||||
|
||||
/// A bitmask representing the parts of a register are alive.
|
||||
/// A bitmask representing the covering of a register with sub-registers.
|
||||
///
|
||||
/// This is typically used to track liveness at sub-register granularity.
|
||||
/// Lane masks for sub-register indices are similar to register units for
|
||||
/// physical registers. The individual bits in a lane mask can't be assigned
|
||||
/// any specific meaning. They can be used to check if two sub-register
|
||||
/// indices overlap.
|
||||
///
|
||||
/// If the target has a register such that:
|
||||
/// Iff the target has a register such that:
|
||||
///
|
||||
/// getSubReg(Reg, A) overlaps getSubReg(Reg, B)
|
||||
///
|
||||
/// then:
|
||||
///
|
||||
/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
|
||||
///
|
||||
/// The converse is not necessarily true. If two lane masks have a common
|
||||
/// bit, the corresponding sub-registers may not overlap, but it can be
|
||||
/// assumed that they usually will.
|
||||
typedef unsigned LaneBitmask;
|
||||
|
||||
class TargetRegisterClass {
|
||||
@ -369,19 +366,6 @@ public:
|
||||
return SubRegIndexLaneMasks[SubIdx];
|
||||
}
|
||||
|
||||
/// Returns true if the given lane mask is imprecise.
|
||||
///
|
||||
/// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of
|
||||
/// bits, so for targets with more than 31 disjunct subregister indices there
|
||||
/// may be cases where:
|
||||
/// getSubReg(Reg,A) does not overlap getSubReg(Reg,B)
|
||||
/// but we still have
|
||||
/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0.
|
||||
/// This function returns true in those cases.
|
||||
static bool isImpreciseLaneMask(LaneBitmask LaneMask) {
|
||||
return LaneMask & 0x80000000u;
|
||||
}
|
||||
|
||||
/// The lane masks returned by getSubRegIndexLaneMask() above can only be
|
||||
/// used to determine if sub-registers overlap - they can't be used to
|
||||
/// determine if a set of sub-registers completely cover another
|
||||
|
@ -163,14 +163,12 @@ namespace {
|
||||
/// LaneMask are split as necessary. @p LaneMask are the lanes that
|
||||
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
|
||||
/// lanemasks already adjusted to the coalesced register.
|
||||
/// @returns false if live range conflicts couldn't get resolved.
|
||||
bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
|
||||
void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
|
||||
LaneBitmask LaneMask, CoalescerPair &CP);
|
||||
|
||||
/// Join the liveranges of two subregisters. Joins @p RRange into
|
||||
/// @p LRange, @p RRange may be invalid afterwards.
|
||||
/// @returns false if live range conflicts couldn't get resolved.
|
||||
bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
LaneBitmask LaneMask, const CoalescerPair &CP);
|
||||
|
||||
/// We found a non-trivially-coalescable copy. If the source value number is
|
||||
@ -2469,7 +2467,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
|
||||
}
|
||||
}
|
||||
|
||||
bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
LaneBitmask LaneMask,
|
||||
const CoalescerPair &CP) {
|
||||
SmallVector<VNInfo*, 16> NewVNInfo;
|
||||
@ -2484,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
// ranges get mapped to the "overflow" lane mask bit which creates unexpected
|
||||
// interferences.
|
||||
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
|
||||
DEBUG(dbgs() << "*** Couldn't join subrange!\n");
|
||||
return false;
|
||||
// We already determined that it is legal to merge the intervals, so this
|
||||
// should never fail.
|
||||
llvm_unreachable("*** Couldn't join subrange!\n");
|
||||
}
|
||||
if (!LHSVals.resolveConflicts(RHSVals) ||
|
||||
!RHSVals.resolveConflicts(LHSVals)) {
|
||||
DEBUG(dbgs() << "*** Couldn't join subrange!\n");
|
||||
return false;
|
||||
// We already determined that it is legal to merge the intervals, so this
|
||||
// should never fail.
|
||||
llvm_unreachable("*** Couldn't join subrange!\n");
|
||||
}
|
||||
|
||||
// The merging algorithm in LiveInterval::join() can't handle conflicting
|
||||
@ -2513,17 +2513,16 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
|
||||
|
||||
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
|
||||
if (EndPoints.empty())
|
||||
return true;
|
||||
return;
|
||||
|
||||
// Recompute the parts of the live range we had to remove because of
|
||||
// CR_Replace conflicts.
|
||||
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
|
||||
<< " points: " << LRange << '\n');
|
||||
LIS->extendToIndices(LRange, EndPoints);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
|
||||
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
|
||||
const LiveRange &ToMerge,
|
||||
LaneBitmask LaneMask,
|
||||
CoalescerPair &CP) {
|
||||
@ -2553,8 +2552,7 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
|
||||
CommonRange = &R;
|
||||
}
|
||||
LiveRange RangeCopy(ToMerge, Allocator);
|
||||
if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
|
||||
return false;
|
||||
joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
|
||||
LaneMask &= ~RMask;
|
||||
}
|
||||
|
||||
@ -2562,7 +2560,6 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
|
||||
DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
|
||||
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
|
||||
@ -2613,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
|
||||
|
||||
// Determine lanemasks of RHS in the coalesced register and merge subranges.
|
||||
unsigned SrcIdx = CP.getSrcIdx();
|
||||
bool Abort = false;
|
||||
if (!RHS.hasSubRanges()) {
|
||||
LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
|
||||
: TRI->getSubRegIndexLaneMask(SrcIdx);
|
||||
if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
|
||||
Abort = true;
|
||||
mergeSubRangeInto(LHS, RHS, Mask, CP);
|
||||
} else {
|
||||
// Pair up subranges and merge.
|
||||
for (LiveInterval::SubRange &R : RHS.subranges()) {
|
||||
LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
|
||||
if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
|
||||
Abort = true;
|
||||
break;
|
||||
}
|
||||
mergeSubRangeInto(LHS, R, Mask, CP);
|
||||
}
|
||||
}
|
||||
if (Abort) {
|
||||
// This shouldn't have happened :-(
|
||||
// However we are aware of at least one existing problem where we
|
||||
// can't merge subranges when multiple ranges end up in the
|
||||
// "overflow bit" 32. As a workaround we drop all subregister ranges
|
||||
// which means we loose some precision but are back to a well defined
|
||||
// state.
|
||||
assert(TargetRegisterInfo::isImpreciseLaneMask(
|
||||
CP.getNewRC()->getLaneMask())
|
||||
&& "SubRange merge should only fail when merging into bit 32.");
|
||||
DEBUG(dbgs() << "\tSubrange join aborted!\n");
|
||||
LHS.clearSubRanges();
|
||||
RHS.clearSubRanges();
|
||||
} else {
|
||||
DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
|
||||
DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
|
||||
|
||||
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
|
||||
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
|
||||
}
|
||||
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
|
||||
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
|
||||
}
|
||||
|
||||
// The merging algorithm in LiveInterval::join() can't handle conflicting
|
||||
|
@ -400,14 +400,6 @@ void VirtRegRewriter::rewrite() {
|
||||
MO.setIsUndef(true);
|
||||
} else if (!MO.isDead()) {
|
||||
assert(MO.isDef());
|
||||
// Things get tricky when we ran out of lane mask bits and
|
||||
// merged multiple lanes into the overflow bit: In this case
|
||||
// our subregister liveness tracking isn't precise and we can't
|
||||
// know what subregister parts are undefined, fall back to the
|
||||
// implicit super-register def then.
|
||||
LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
|
||||
if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
|
||||
SuperDefs.push_back(PhysReg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1171,20 +1171,13 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
|
||||
CoveringLanes = ~0u;
|
||||
for (auto &Idx : SubRegIndices) {
|
||||
if (Idx.getComposites().empty()) {
|
||||
if (Bit > 32) {
|
||||
PrintFatalError(
|
||||
Twine("Ran out of lanemask bits to represent subregister ")
|
||||
+ Idx.getName());
|
||||
}
|
||||
Idx.LaneMask = 1u << Bit;
|
||||
// Share bit 31 in the unlikely case there are more than 32 leafs.
|
||||
//
|
||||
// Sharing bits is harmless; it allows graceful degradation in targets
|
||||
// with more than 32 vector lanes. They simply get a limited resolution
|
||||
// view of lanes beyond the 32nd.
|
||||
//
|
||||
// See also the comment for getSubRegIndexLaneMask().
|
||||
if (Bit < 31)
|
||||
++Bit;
|
||||
else
|
||||
// Once bit 31 is shared among multiple leafs, the 'lane' it represents
|
||||
// is no longer covering its registers.
|
||||
CoveringLanes &= ~(1u << Bit);
|
||||
++Bit;
|
||||
} else {
|
||||
Idx.LaneMask = 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user