1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[GISel][KnownBits] Add a cache mechanism to speed compile time

This patch adds a cache that is valid only for the duration of a call
to getKnownBits. With such short lived cache we avoid all the problems
of cache invalidation while still getting the benefits of reusing
the information we already computed.

This cache is useful whenever an instruction occurs more than once
in a chain of computation.
E.g.,
v0 = G_ADD v1, v2
v3 = G_ADD v0, v1

Previously we would compute the known bits for:
v1, v2, v0, then v1 again and finally v3.

With the patch, now we won't have to recompute v1 again.

NFC
This commit is contained in:
Quentin Colombet 2020-02-20 17:51:26 -08:00
parent a2f76095a2
commit e023c34590
2 changed files with 37 additions and 8 deletions

View File

@ -13,6 +13,7 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H #ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H #define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/Register.h"
@ -32,6 +33,8 @@ class GISelKnownBits : public GISelChangeObserver {
const TargetLowering &TL; const TargetLowering &TL;
const DataLayout &DL; const DataLayout &DL;
unsigned MaxDepth; unsigned MaxDepth;
/// Cache maintained during a computeKnownBits request.
SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;
public: public:
GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6); GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);

View File

@ -69,7 +69,10 @@ KnownBits GISelKnownBits::getKnownBits(Register R) {
LLT Ty = MRI.getType(R); LLT Ty = MRI.getType(R);
APInt DemandedElts = APInt DemandedElts =
Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
// For now, we only maintain the cache during one request.
assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
computeKnownBitsImpl(R, Known, DemandedElts); computeKnownBitsImpl(R, Known, DemandedElts);
ComputeKnownBitsCache.clear();
return Known; return Known;
} }
@ -85,6 +88,17 @@ APInt GISelKnownBits::getKnownZeroes(Register R) {
APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
static void dumpResult(const MachineInstr &MI, const KnownBits &Known,
unsigned Depth) {
dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
<< "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
<< (Known.Zero | Known.One).toString(16, false) << "\n"
<< "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false)
<< "\n"
<< "[" << Depth << "] One: 0x" << Known.One.toString(16, false)
<< "\n";
}
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts, const APInt &DemandedElts,
unsigned Depth) { unsigned Depth) {
@ -102,6 +116,14 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
} }
unsigned BitWidth = DstTy.getSizeInBits(); unsigned BitWidth = DstTy.getSizeInBits();
auto CacheEntry = ComputeKnownBitsCache.find(R);
if (CacheEntry != ComputeKnownBitsCache.end()) {
Known = CacheEntry->second;
LLVM_DEBUG(dbgs() << "Cache hit at ");
LLVM_DEBUG(dumpResult(MI, Known, Depth));
assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
return;
}
Known = KnownBits(BitWidth); // Don't know anything Known = KnownBits(BitWidth); // Don't know anything
if (DstTy.isVector()) if (DstTy.isVector())
@ -137,6 +159,14 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// point of the pipeline, otherwise the main live-range will be // point of the pipeline, otherwise the main live-range will be
// defined more than once, which is against SSA. // defined more than once, which is against SSA.
assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
// Record in the cache that we know nothing for MI.
// This will get updated later and in the meantime, if we reach that
// phi again, because of a loop, we will cut the search thanks to this
// cache entry. When this happens this cache entry is actually accurate,
// thus we are not losing anything by doing that, because right now,
// the main analysis will reach the maximum depth without being able
// to fully analyze the phi.
ComputeKnownBitsCache[R] = KnownBits(BitWidth);
// PHI's operand are a mix of registers and basic blocks interleaved. // PHI's operand are a mix of registers and basic blocks interleaved.
// We only care about the register ones. // We only care about the register ones.
for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
@ -374,14 +404,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
} }
assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(!Known.hasConflict() && "Bits known to be one AND zero?");
LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" LLVM_DEBUG(dumpResult(MI, Known, Depth));
<< Depth << "] Computed for: " << MI << "[" << Depth
<< "] Known: 0x" // Update the cache.
<< (Known.Zero | Known.One).toString(16, false) << "\n" ComputeKnownBitsCache[R] = Known;
<< "[" << Depth << "] Zero: 0x"
<< Known.Zero.toString(16, false) << "\n"
<< "[" << Depth << "] One: 0x"
<< Known.One.toString(16, false) << "\n");
} }
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned GISelKnownBits::computeNumSignBits(Register R,