1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

Bitcode: Collect all MDString records into a single blob

Optimize output of MDStrings in bitcode.  This emits them in big blocks
(currently 1024) in a pair of records:
  - BULK_STRING_SIZES: the sizes of the strings in the block, and
  - BULK_STRING_DATA: a single blob, which is the concatenation of all
    the strings.

Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this
should (a) slightly reduce bitcode size, since there is less record
overhead, and (b) greatly improve reading speed, since blobs are super
cheap to deserialize.

I needed to add support for blobs to streaming input to get the test
suite passing.
  - StreamingMemoryObject::getPointer reads ahead and returns the
    address of the blob.
  - To avoid a possible reallocation of StreamingMemoryObject::Bytes,
    BitstreamCursor::readRecord needs to move the call to JumpToEnd
    forward so that getPointer is the last bitstream operation.

llvm-svn: 264409
This commit is contained in:
Duncan P. N. Exon Smith 2016-03-25 14:40:18 +00:00
parent bf29b07c44
commit 7b1e6b8367
11 changed files with 142 additions and 50 deletions

View File

@ -209,7 +209,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
};
enum MetadataCodes {
METADATA_STRING = 1, // MDSTRING: [values]
METADATA_STRING_OLD = 1, // MDSTRING: [values]
METADATA_VALUE = 2, // VALUE: [type num, value num]
METADATA_NODE = 3, // NODE: [n x md num]
METADATA_NAME = 4, // STRING: [values]
@ -243,6 +243,8 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
METADATA_MODULE = 32, // [distinct, scope, name, ...]
METADATA_MACRO = 33, // [distinct, macinfo, line, name, value]
METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
METADATA_BULK_STRING_SIZES = 35, // [m x (size num)]
METADATA_BULK_STRING_DATA = 36, // [blob]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each

View File

@ -28,15 +28,7 @@ public:
uint64_t getExtent() const override;
uint64_t readBytes(uint8_t *Buf, uint64_t Size,
uint64_t Address) const override;
const uint8_t *getPointer(uint64_t address, uint64_t size) const override {
// FIXME: This could be fixed by ensuring the bytes are fetched and
// making a copy, requiring that the bitcode size be known, or
// otherwise ensuring that the memory doesn't go away/get reallocated,
// but it's not currently necessary. Users that need the pointer (any
// that need Blobs) don't stream.
report_fatal_error("getPointer in streaming memory objects not allowed");
return nullptr;
}
const uint8_t *getPointer(uint64_t Address, uint64_t Size) const override;
bool isValidAddress(uint64_t address) const override;
/// Drop s bytes from the front of the stream, pushing the positions of the

View File

@ -2363,7 +2363,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
NextMetadataNo++);
break;
}
case bitc::METADATA_STRING: {
case bitc::METADATA_STRING_OLD: {
std::string String(Record.begin(), Record.end());
// Test for upgrading !llvm.loop.
@ -2373,6 +2373,38 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
MetadataList.assignValue(MD, NextMetadataNo++);
break;
}
case bitc::METADATA_BULK_STRING_SIZES: {
// This is a pair of records for an MDString block: SIZES, which is a
// list of string lengths; and DATA, which is a blob with all the strings
// concatenated together.
//
// Note: since this record type was introduced after the upgrade for
// !llvm.loop, we don't need to change HasSeenOldLoopTags.
if (Record.empty())
return error("Invalid record: missing bulk metadata string sizes");
StringRef Blob;
SmallVector<uint64_t, 1> BlobRecord;
Code = Stream.ReadCode();
unsigned BlobCode = Stream.readRecord(Code, BlobRecord, &Blob);
if (BlobCode != bitc::METADATA_BULK_STRING_DATA)
return error("Invalid record: missing bulk metadata string data");
if (!BlobRecord.empty())
return error("Invalid record: unexpected bulk metadata arguments");
for (uint64_t Size : Record) {
if (Blob.size() < Size)
return error("Invalid record: not enough bulk metadata string bytes");
// Extract the current string.
MetadataList.assignValue(MDString::get(Context, Blob.slice(0, Size)),
NextMetadataNo++);
Blob = Blob.drop_front(Size);
}
if (!Blob.empty())
return error("Invalid record: too many bulk metadata string bytes");
break;
}
case bitc::METADATA_KIND: {
// Support older bitcode files that had METADATA_KIND records in a
// block with METADATA_BLOCK_ID.

View File

@ -261,6 +261,10 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
}
// Otherwise, inform the streamer that we need these bytes in memory.
// Skip over tail padding first. We can't do it later if this is a
// streaming memory object, since that could reallocate the storage that
// the blob pointer references.
JumpToBit(NewEnd);
const char *Ptr = (const char*)
BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
@ -272,8 +276,6 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
for (; NumElts; --NumElts)
Vals.push_back((unsigned char)*Ptr++);
}
// Skip over tail padding.
JumpToBit(NewEnd);
}
return Code;

View File

@ -1347,31 +1347,78 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE,
}
}
static unsigned createMDStringDataAbbrev(BitstreamWriter &Stream) {
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_BULK_STRING_DATA));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
return Stream.EmitAbbrev(Abbv);
}
static void emitMDStringBlob(unsigned DataAbbrev,
ArrayRef<const Metadata *> Strings,
BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record,
SmallString<4096> &Blob) {
for (const Metadata *MD : Strings) {
StringRef S = cast<MDString>(MD)->getString();
Record.push_back(S.size());
Blob.append(S.begin(), S.end());
}
Stream.EmitRecord(bitc::METADATA_BULK_STRING_SIZES, Record);
Record.clear();
Record.push_back(bitc::METADATA_BULK_STRING_DATA);
Stream.EmitRecordWithBlob(DataAbbrev, Record, Blob);
Record.clear();
}
/// Write out a section of records for MDString.
///
/// All the MDString elements in a metadata block are emitted in bulk. They're
/// grouped into blocks, and each block is emitted with pair of records:
///
/// - SIZES: a list of the sizes of the strings in the block.
/// - DATA: the blob itself.
static void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record) {
if (Strings.empty())
return;
// Emit strings in large blocks to reduce record overhead. Somewhat
// arbitrarily, limit this to 512 strings per blob:
// - big enough to eliminate overhead;
// - small enough that the reader's SIZES record will stay within a page.
const size_t NumStringsPerBlob = 512;
Record.reserve(std::min(NumStringsPerBlob, Strings.size()));
unsigned DataAbbrev = createMDStringDataAbbrev(Stream);
SmallString<4096> Blob;
while (Strings.size() > NumStringsPerBlob) {
emitMDStringBlob(DataAbbrev, Strings.slice(0, NumStringsPerBlob), Stream,
Record, Blob);
Strings = Strings.slice(NumStringsPerBlob);
}
if (!Strings.empty())
emitMDStringBlob(DataAbbrev, Strings, Stream, Record, Blob);
}
static void WriteModuleMetadata(const Module &M,
const ValueEnumerator &VE,
BitstreamWriter &Stream) {
const auto &MDs = VE.getMDs();
if (MDs.empty() && M.named_metadata_empty())
if (VE.getMDs().empty() && M.named_metadata_empty())
return;
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
unsigned MDSAbbrev = 0;
if (VE.hasMDString()) {
// Abbrev for METADATA_STRING.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
MDSAbbrev = Stream.EmitAbbrev(Abbv);
}
// Initialize MDNode abbreviations.
#define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0;
#include "llvm/IR/Metadata.def"
SmallVector<uint64_t, 64> Record;
for (const Metadata *MD : MDs) {
writeMetadataStrings(VE.getMDStrings(), Stream, Record);
for (const Metadata *MD : VE.getNonMDStrings()) {
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
assert(N->isResolved() && "Expected forward references to be resolved");
@ -1385,17 +1432,7 @@ static void WriteModuleMetadata(const Module &M,
#include "llvm/IR/Metadata.def"
}
}
if (const auto *MDC = dyn_cast<ConstantAsMetadata>(MD)) {
WriteValueAsMetadata(MDC, VE, Stream, Record);
continue;
}
const MDString *MDS = cast<MDString>(MD);
// Code: [strchar x N]
Record.append(MDS->bytes_begin(), MDS->bytes_end());
// Emit the finished record.
Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
Record.clear();
WriteValueAsMetadata(cast<ConstantAsMetadata>(MD), VE, Stream, Record);
}
writeNamedMetadata(M, VE, Stream, Record);

View File

@ -280,8 +280,7 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
ValueEnumerator::ValueEnumerator(const Module &M,
bool ShouldPreserveUseListOrder)
: HasMDString(false),
ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
: ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
if (ShouldPreserveUseListOrder)
UseListOrders = predictUseListOrder(M);
@ -375,6 +374,9 @@ ValueEnumerator::ValueEnumerator(const Module &M,
// Optimize constant ordering.
OptimizeConstants(FirstConstant, Values.size());
// Organize metadata ordering.
organizeMetadata();
}
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
@ -530,8 +532,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
EnumerateMDNodeOperands(N);
else if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
EnumerateValue(C->getValue());
HasMDString |= isa<MDString>(MD);
else
++NumMDStrings;
// Replace the dummy ID inserted above with the correct one. MetadataMap may
// have changed by inserting operands, so we need a fresh lookup here.
@ -557,6 +559,19 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata(
FunctionLocalMDs.push_back(Local);
}
void ValueEnumerator::organizeMetadata() {
if (!NumMDStrings)
return;
// Put the strings first.
std::stable_partition(MDs.begin(), MDs.end(),
[](const Metadata *MD) { return isa<MDString>(MD); });
// Renumber.
for (unsigned I = 0, E = MDs.size(); I != E; ++I)
MetadataMap[MDs[I]] = I + 1;
}
void ValueEnumerator::EnumerateValue(const Value *V) {
assert(!V->getType()->isVoidTy() && "Can't insert void values!");
assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");

View File

@ -66,7 +66,7 @@ private:
SmallVector<const LocalAsMetadata *, 8> FunctionLocalMDs;
typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
MetadataMapType MetadataMap;
bool HasMDString;
unsigned NumMDStrings = 0;
bool ShouldPreserveUseListOrder;
typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
@ -121,8 +121,6 @@ public:
}
unsigned numMDs() const { return MDs.size(); }
bool hasMDString() const { return HasMDString; }
bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
unsigned getTypeID(Type *T) const {
@ -157,9 +155,16 @@ public:
const ValueList &getValues() const { return Values; }
const std::vector<const Metadata *> &getMDs() const { return MDs; }
ArrayRef<const Metadata *> getMDStrings() const {
return makeArrayRef(MDs).slice(0, NumMDStrings);
}
ArrayRef<const Metadata *> getNonMDStrings() const {
return makeArrayRef(MDs).slice(NumMDStrings);
}
const SmallVectorImpl<const LocalAsMetadata *> &getFunctionLocalMDs() const {
return FunctionLocalMDs;
}
const TypeList &getTypes() const { return Types; }
const std::vector<const BasicBlock*> &getBasicBlocks() const {
return BasicBlocks;
@ -189,6 +194,10 @@ public:
private:
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
// Reorder the reachable metadata. This is not just an optimization, but is
// mandatory for emitting MDString correctly.
void organizeMetadata();
void EnumerateMDNodeOperands(const MDNode *N);
void EnumerateMetadata(const Metadata *MD);
void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local);

View File

@ -104,6 +104,12 @@ uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
return Size;
}
const uint8_t *StreamingMemoryObject::getPointer(uint64_t Address,
uint64_t Size) const {
fetchToPos(Address + Size - 1);
return &Bytes[Address + BytesSkipped];
}
bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
if (BytesRead < s) return true;
BytesSkipped = s;

View File

@ -168,11 +168,6 @@ RUN: FileCheck --check-prefix=INVALID-ARGUMENT-TYPE %s
INVALID-ARGUMENT-TYPE: Invalid function argument type
RUN: not llvm-dis -disable-output %p/Inputs/invalid-fixme-streaming-blob.bc 2>&1 | \
RUN: FileCheck --check-prefix=STREAMING-BLOB %s
STREAMING-BLOB: getPointer in streaming memory objects not allowed
RUN: not llvm-dis -disable-output %p/Inputs/invalid-function-comdat-id.bc 2>&1 | \
RUN: FileCheck --check-prefix=INVALID-FCOMDAT-ID %s

View File

@ -312,7 +312,9 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
case bitc::METADATA_BLOCK_ID:
switch(CodeID) {
default:return nullptr;
STRINGIFY_CODE(METADATA, STRING)
STRINGIFY_CODE(METADATA, STRING_OLD)
STRINGIFY_CODE(METADATA, BULK_STRING_SIZES)
STRINGIFY_CODE(METADATA, BULK_STRING_DATA)
STRINGIFY_CODE(METADATA, NAME)
STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
STRINGIFY_CODE(METADATA, NODE)