1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[Support] Make line-number cache robust against access patterns.

Summary:
The LLVM SourceMgr class (which is used indirectly by Swift, though not Clang)
has a routine for looking up line numbers of SMLocs. This routine uses a
shared, special-purpose cache that handles exactly one access pattern
efficiently: looking up the line number of an SMLoc that points into the same
buffer as the last query made to the SourceMgr, at a location in the buffer at
or ahead of the last query.

When this works it's fine, but when it fails it's catastrophic for performancer:
one recent out-of-order access from a Swift utility routine ran for tens of
seconds, spending 99% of its time repeatedly scanning buffers for '\n'.

This change removes the shared cache from the SourceMgr and installs a new
cache in each SrcBuffer. The per-SrcBuffer caches are also "full", in the sense
that rather than caching a single last-query pointer, they cache _all_ the
line-ending offsets, in a binary-searchable array, such that once it's
populated (on first access), all subsequent access patterns run at the same
speed.

Performance measurements I've done show this is actually a little bit faster on
real codebases (though only a couple fractions of a percent). Memory usage is
up by a few tens to hundreds of bytes per SrcBuffer that has a line lookup done
on it; I've attempted to minimize this by using dynamic selection of integer
sized when storing offset arrays. But the main motive here is to
make-impossible the cases we don't always see, that show up by surprise when
there is an out-of-order access pattern.

Reviewers: jordan_rose

Reviewed By: jordan_rose

Subscribers: probinson, llvm-commits

Differential Revision: https://reviews.llvm.org/D45003

llvm-svn: 329470
This commit is contained in:
Graydon Hoare 2018-04-07 00:44:02 +00:00
parent d0c91cf6bf
commit cded1d3751
4 changed files with 421 additions and 77 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@ -57,8 +58,38 @@ private:
/// The memory buffer for the file.
std::unique_ptr<MemoryBuffer> Buffer;
/// Helper type for OffsetCache below: since we're storing many offsets
/// into relatively small files (often smaller than 2^8 or 2^16 bytes),
/// we select the offset vector element type dynamically based on the
/// size of Buffer.
using VariableSizeOffsets = PointerUnion4<std::vector<uint8_t> *,
std::vector<uint16_t> *,
std::vector<uint32_t> *,
std::vector<uint64_t> *>;
/// Vector of offsets into Buffer at which there are line-endings
/// (lazily populated). Once populated, the '\n' that marks the end of
/// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since
/// these offsets are in sorted (ascending) order, they can be
/// binary-searched for the first one after any given offset (eg. an
/// offset corresponding to a particular SMLoc).
mutable VariableSizeOffsets OffsetCache;
/// Populate \c OffsetCache and look up a given \p Ptr in it, assuming
/// it points somewhere into \c Buffer. The static type parameter \p T
/// must be an unsigned integer type from uint{8,16,32,64}_t large
/// enough to store offsets inside \c Buffer.
template<typename T>
unsigned getLineNumber(const char *Ptr) const;
/// This is the location of the parent include, or null if at the top level.
SMLoc IncludeLoc;
SrcBuffer() = default;
SrcBuffer(SrcBuffer &&);
SrcBuffer(const SrcBuffer &) = delete;
SrcBuffer &operator=(const SrcBuffer &) = delete;
~SrcBuffer();
};
/// This is all of the buffers that we are reading from.
@ -67,10 +98,6 @@ private:
// This is the list of directories we should search for include files in.
std::vector<std::string> IncludeDirectories;
/// This is a cache for line number queries, its implementation is really
/// private to SourceMgr.cpp.
mutable void *LineNoCache = nullptr;
DiagHandlerTy DiagHandler = nullptr;
void *DiagContext = nullptr;
@ -80,7 +107,7 @@ public:
SourceMgr() = default;
SourceMgr(const SourceMgr &) = delete;
SourceMgr &operator=(const SourceMgr &) = delete;
~SourceMgr();
~SourceMgr() = default;
void setIncludeDirs(const std::vector<std::string> &Dirs) {
IncludeDirectories = Dirs;

View File

@ -168,14 +168,6 @@ private:
/// \brief List of forward directional labels for diagnosis at the end.
SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
/// When generating dwarf for assembly source files we need to calculate the
/// logical line number based on the last parsed cpp hash file line comment
/// and current line. Since this is slow and messes up the SourceMgr's
/// cache we save the last info we queried with SrcMgr.FindLineNumber().
SMLoc LastQueryIDLoc;
unsigned LastQueryBuffer;
unsigned LastQueryLine;
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect = ~0U;
@ -2189,20 +2181,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
0, StringRef(), CppHashInfo.Filename);
getContext().setGenDwarfFileNumber(FileNumber);
// Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's
// cache with the different Loc from the call above we save the last
// info we queried here with SrcMgr.FindLineNumber().
unsigned CppHashLocLineNo;
if (LastQueryIDLoc == CppHashInfo.Loc &&
LastQueryBuffer == CppHashInfo.Buf)
CppHashLocLineNo = LastQueryLine;
else {
CppHashLocLineNo =
SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
LastQueryLine = CppHashLocLineNo;
LastQueryIDLoc = CppHashInfo.Loc;
LastQueryBuffer = CppHashInfo.Buf;
}
unsigned CppHashLocLineNo =
SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
}

View File

@ -28,6 +28,7 @@
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <limits>
#include <memory>
#include <string>
#include <utility>
@ -36,24 +37,6 @@ using namespace llvm;
static const size_t TabStop = 8;
namespace {
struct LineNoCacheTy {
const char *LastQuery;
unsigned LastQueryBufferID;
unsigned LineNoOfQuery;
};
} // end anonymous namespace
static LineNoCacheTy *getCache(void *Ptr) {
return (LineNoCacheTy*)Ptr;
}
SourceMgr::~SourceMgr() {
delete getCache(LineNoCache);
}
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc,
std::string &IncludedFile) {
@ -85,46 +68,86 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
return 0;
}
template <typename T>
unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
// Ensure OffsetCache is allocated and populated with offsets of all the
// '\n' bytes.
std::vector<T> *Offsets = nullptr;
if (OffsetCache.isNull()) {
Offsets = new std::vector<T>();
OffsetCache = Offsets;
size_t Sz = Buffer->getBufferSize();
assert(Sz <= std::numeric_limits<T>::max());
StringRef S = Buffer->getBuffer();
for (size_t N = 0; N < Sz; ++N) {
if (S[N] == '\n') {
Offsets->push_back(static_cast<T>(N));
}
}
} else {
Offsets = OffsetCache.get<std::vector<T> *>();
}
const char *BufStart = Buffer->getBufferStart();
assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
ptrdiff_t PtrDiff = Ptr - BufStart;
assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
T PtrOffset = static_cast<T>(PtrDiff);
// std::lower_bound returns the first EOL offset that's not-less-than
// PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
// (including if PtrOffset refers to the EOL itself). If there's no such
// EOL, returns end().
auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset);
// Lines count from 1, so add 1 to the distance from the 0th line.
return (1 + (EOL - Offsets->begin()));
}
SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
: Buffer(std::move(Other.Buffer)),
OffsetCache(Other.OffsetCache),
IncludeLoc(Other.IncludeLoc) {
Other.OffsetCache = nullptr;
}
SourceMgr::SrcBuffer::~SrcBuffer() {
if (!OffsetCache.isNull()) {
if (OffsetCache.is<std::vector<uint8_t>*>())
delete OffsetCache.get<std::vector<uint8_t>*>();
else if (OffsetCache.is<std::vector<uint16_t>*>())
delete OffsetCache.get<std::vector<uint16_t>*>();
else if (OffsetCache.is<std::vector<uint32_t>*>())
delete OffsetCache.get<std::vector<uint32_t>*>();
else
delete OffsetCache.get<std::vector<uint64_t>*>();
OffsetCache = nullptr;
}
}
std::pair<unsigned, unsigned>
SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
if (!BufferID)
BufferID = FindBufferContainingLoc(Loc);
assert(BufferID && "Invalid Location!");
const MemoryBuffer *Buff = getMemoryBuffer(BufferID);
auto &SB = getBufferInfo(BufferID);
const char *Ptr = Loc.getPointer();
// Count the number of \n's between the start of the file and the specified
// location.
unsigned LineNo = 1;
size_t Sz = SB.Buffer->getBufferSize();
assert(Sz <= std::numeric_limits<uint64_t>::max());
unsigned LineNo;
if (Sz <= std::numeric_limits<uint8_t>::max())
LineNo = SB.getLineNumber<uint8_t>(Ptr);
else if (Sz <= std::numeric_limits<uint16_t>::max())
LineNo = SB.getLineNumber<uint16_t>(Ptr);
else if (Sz <= std::numeric_limits<uint32_t>::max())
LineNo = SB.getLineNumber<uint32_t>(Ptr);
else
LineNo = SB.getLineNumber<uint64_t>(Ptr);
const char *BufStart = Buff->getBufferStart();
const char *Ptr = BufStart;
// If we have a line number cache, and if the query is to a later point in the
// same file, start searching from the last query location. This optimizes
// for the case when multiple diagnostics come out of one file in order.
if (LineNoCacheTy *Cache = getCache(LineNoCache))
if (Cache->LastQueryBufferID == BufferID &&
Cache->LastQuery <= Loc.getPointer()) {
Ptr = Cache->LastQuery;
LineNo = Cache->LineNoOfQuery;
}
// Scan for the location being queried, keeping track of the number of lines
// we see.
for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
if (*Ptr == '\n') ++LineNo;
// Allocate the line number cache if it doesn't exist.
if (!LineNoCache)
LineNoCache = new LineNoCacheTy();
// Update the line # cache.
LineNoCacheTy &Cache = *getCache(LineNoCache);
Cache.LastQueryBufferID = BufferID;
Cache.LastQuery = Ptr;
Cache.LineNoOfQuery = LineNo;
const char *BufStart = SB.Buffer->getBufferStart();
size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);

View File

@ -107,6 +107,320 @@ TEST_F(SourceMgrTest, LocationAtNewline) {
Output);
}
TEST_F(SourceMgrTest, LocationAtEmptyBuffer) {
setMainBuffer("", "file.in");
printMessage(getLoc(0), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:1:1: error: message\n"
"\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationJustOnSoleNewline) {
setMainBuffer("\n", "file.in");
printMessage(getLoc(0), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:1:1: error: message\n"
"\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationJustAfterSoleNewline) {
setMainBuffer("\n", "file.in");
printMessage(getLoc(1), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:2:1: error: message\n"
"\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationJustAfterNonNewline) {
setMainBuffer("123", "file.in");
printMessage(getLoc(3), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:1:4: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationOnFirstLineOfMultiline) {
setMainBuffer("1234\n6789\n", "file.in");
printMessage(getLoc(3), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:1:4: error: message\n"
"1234\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationOnEOLOfFirstLineOfMultiline) {
setMainBuffer("1234\n6789\n", "file.in");
printMessage(getLoc(4), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:1:5: error: message\n"
"1234\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationOnSecondLineOfMultiline) {
setMainBuffer("1234\n6789\n", "file.in");
printMessage(getLoc(5), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:2:1: error: message\n"
"6789\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationOnSecondLineOfMultilineNoSecondEOL) {
setMainBuffer("1234\n6789", "file.in");
printMessage(getLoc(5), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:2:1: error: message\n"
"6789\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationOnEOLOfSecondSecondLineOfMultiline) {
setMainBuffer("1234\n6789\n", "file.in");
printMessage(getLoc(9), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:2:5: error: message\n"
"6789\n"
" ^\n",
Output);
}
#define STRING_LITERAL_253_BYTES \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n1234567890\n" \
"1234567890\n"
//===----------------------------------------------------------------------===//
// 255-byte buffer tests
//===----------------------------------------------------------------------===//
TEST_F(SourceMgrTest, LocationBeforeEndOf255ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(253), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:1: error: message\n"
"12\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf255ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:2: error: message\n"
"12\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf255ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:3: error: message\n"
"12\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationBeforeEndOf255ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1\n" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(253), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:1: error: message\n"
"1\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf255ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1\n" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:2: error: message\n"
"1\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf255ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1\n" // + 2 = 255 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:25:1: error: message\n"
"\n"
"^\n",
Output);
}
//===----------------------------------------------------------------------===//
// 256-byte buffer tests
//===----------------------------------------------------------------------===//
TEST_F(SourceMgrTest, LocationBeforeEndOf256ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:2: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf256ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:3: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf256ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:4: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationBeforeEndOf256ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12\n" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:2: error: message\n"
"12\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf256ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12\n" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:3: error: message\n"
"12\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf256ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"12\n" // + 3 = 256 bytes
, "file.in");
printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:25:1: error: message\n"
"\n"
"^\n",
Output);
}
//===----------------------------------------------------------------------===//
// 257-byte buffer tests
//===----------------------------------------------------------------------===//
TEST_F(SourceMgrTest, LocationBeforeEndOf257ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1234" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:3: error: message\n"
"1234\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf257ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1234" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:4: error: message\n"
"1234\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf257ByteBuffer) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"1234" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(257), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:5: error: message\n"
"1234\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationBeforeEndOf257ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123\n" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:3: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationAtEndOf257ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123\n" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:24:4: error: message\n"
"123\n"
" ^\n",
Output);
}
TEST_F(SourceMgrTest, LocationPastEndOf257ByteBufferEndingInNewline) {
setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes
"123\n" // + 4 = 257 bytes
, "file.in");
printMessage(getLoc(257), SourceMgr::DK_Error, "message", None, None);
EXPECT_EQ("file.in:25:1: error: message\n"
"\n"
"^\n",
Output);
}
TEST_F(SourceMgrTest, BasicRange) {
setMainBuffer("aaa bbb\nccc ddd\n", "file.in");
printMessage(getLoc(4), SourceMgr::DK_Error, "message", getRange(4, 3), None);