diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 00000000000..b7c8c6ee47c --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,250 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" + +using namespace llvm; + +const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { + switch (AT) { + default: llvm_unreachable("invalid AtomType!"); + case eAtomTypeNULL: return "eAtomTypeNULL"; + case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; + case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; + case eAtomTypeTag: return "eAtomTypeTag"; + case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; + case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; + } +} + +// The general case would need to have a less hard coded size for the +// length of the HeaderData, however, if we're constructing based on a +// single Atom then we know it will always be: 4 + 4 + 2 + 2. +DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : + Header(12), + HeaderData(atom) { +} + +void DwarfAccelTable::AddName(StringRef Name, DIE* die) { + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DIEArray &DIEs = Entries[Name]; + DIEs.push_back(die); +} + +void DwarfAccelTable::ComputeBucketCount(void) { + // First get the number of unique hashes. + std::vector uniques; + uniques.resize(Data.size()); + for (size_t i = 0; i < Data.size(); ++i) + uniques[i] = Data[i]->HashValue; + std::sort(uniques.begin(), uniques.end()); + std::vector::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) Header.bucket_count = num/4; + if (num > 16) Header.bucket_count = num/2; + else Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { + // Create the individual hash data outputs. + for (StringMap::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + struct HashData *Entry = new HashData((*EI).getKeyData()); + for (DIEArray::const_iterator DI = (*EI).second.begin(), + DE = (*EI).second.end(); + DI != DE; ++DI) + Entry->addOffset((*DI)->getOffset()); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0; i < Data.size(); ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + } +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer.AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer.AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer.AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. This will look +// like a list of numbers of how many elements are in each bucket. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0; i < Buckets.size(); ++i) { + Twine Comment = Twine("Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + index += Buckets[i].size(); + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Twine Comment = Twine("Hash in Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + Asm->EmitInt32((*HI)->HashValue); + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Twine Comment = Twine("Offset in Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + MCContext &Context = Asm->OutStreamer.getContext(); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), + Context); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + // Remember to emit the label for our offset. + Asm->OutStreamer.EmitLabel((*HI)->Sym); + Asm->OutStreamer.AddComment((*HI)->Str); + Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), + D->getDwarfStrSectionSym()); + Asm->OutStreamer.AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->DIEOffsets.size()); + for (std::vector::const_iterator + DI = (*HI)->DIEOffsets.begin(), DE = (*HI)->DIEOffsets.end(); + DI != DE; ++DI) { + Asm->EmitInt32((*DI)); + } + // Emit a 0 to terminate the data unless we have a hash collision. + if (PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + PrevHash = (*HI)->HashValue; + } + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, + DwarfDebug *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + EmitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + O << "Name: " << (*EI).getKeyData() << "\n"; + for (DIEArray::const_iterator DI = (*EI).second.begin(), + DE = (*EI).second.end(); + DI != DE; ++DI) + (*DI)->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0; i < Buckets.size(); ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector::const_iterator + DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) + (*DI)->print(O); + + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 00000000000..242841a5095 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,254 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include +#include + +// The apple dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; + +class DwarfAccelTable { + + enum HashFunctionType { + eHashFunctionDJB = 0u + }; + + static uint32_t HashDJB (const char *s) { + uint32_t h = 5381; + for (unsigned char c = *s; c; c = *++s) + h = ((h << 5) + h) + c; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount (void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader (uint32_t data_len) : + magic (MagicHash), version (1), hash_function (eHashFunctionDJB), + bucket_count (0), hashes_count (0), header_data_len (data_len) + {}; + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + enum AtomType { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that + // contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as + // DW_FORM_data1 (if no tags exceed 255) or + // DW_FORM_data2. + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags + }; + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + AtomType type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + Atom(AtomType type, uint16_t form) : type(type), form(form) {}; + static const char * AtomTypeString(enum AtomType); +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << dwarf::TagString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { + print(dbgs()); + } +#endif + }; + + private: + struct TableHeaderData { + + uint32_t die_offset_base; + std::vector Atoms; + + TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) + : die_offset_base(offset) { + Atoms.push_back(Atom); + } + +#ifndef NDEBUG + void print (raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { + print(dbgs()); + } +#endif + }; + + // The data itself consists of a str_offset (to deal with collisions in + // some magical way? this looks like the KeyType from the spec, which + // should mean an integer compare on read), a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + std::vector DIEOffsets; // offsets + HashData(StringRef S) : Str(S) { + HashValue = DwarfAccelTable::HashDJB(S.str().c_str()); + } + void addOffset(uint32_t off) { DIEOffsets.push_back(off); } + #ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: " ; + if (Sym) Sym->print(O); + else O << ""; + O << "\n"; + for (size_t i = 0; i < DIEOffsets.size(); i++) + O << " Offset: " << DIEOffsets[i] << "\n"; + } + void dump() { + print(dbgs()); + } + #endif + }; + + DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT + void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void EmitOffsets(AsmPrinter *, MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector Data; + + // String Data + typedef std::vector DIEArray; + typedef StringMap StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector HashList; + typedef std::vector BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation + public: + DwarfAccelTable(DwarfAccelTable::Atom Atom); + void AddName(StringRef, DIE*); + void FinalizeTable(AsmPrinter *, const char *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; + +} +#endif