1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

GSYM: add encoding and decoding to FunctionInfo

This patch adds encoding and decoding of the FunctionInfo objects along with full error handling and tests. Full details of the FunctionInfo encoding format appear in the FunctionInfo.h header file.

Differential Revision: https://reviews.llvm.org/D67506

llvm-svn: 372135
This commit is contained in:
Greg Clayton 2019-09-17 16:15:49 +00:00
parent b62ff560b1
commit d428cdddb3
3 changed files with 403 additions and 36 deletions

View File

@ -21,15 +21,68 @@ namespace llvm {
class raw_ostream;
namespace gsym {
/// Function information in GSYM files encodes information for one
/// contiguous address range. The name of the function is encoded as
/// a string table offset and allows multiple functions with the same
/// name to share the name string in the string table. Line tables are
/// stored in a sorted vector of gsym::LineEntry objects and are split
/// into line tables for each function. If a function has a discontiguous
/// range, it will be split into two gsym::FunctionInfo objects. If the
/// function has inline functions, the information will be encoded in
/// the "Inline" member, see gsym::InlineInfo for more information.
/// Function information in GSYM files encodes information for one contiguous
/// address range. If a function has discontiguous address ranges, they will
/// need to be encoded using multiple FunctionInfo objects.
///
/// ENCODING
///
/// The function information gets the function start address as an argument
/// to the FunctionInfo::decode(...) function. This information is calculated
/// from the GSYM header and an address offset from the GSYM address offsets
/// table. The encoded FunctionInfo information must be alinged to a 4 byte
/// boundary.
///
/// The encoded data for a FunctionInfo starts with fixed data that all
/// function info objects have:
///
/// ENCODING NAME DESCRIPTION
/// ========= =========== ====================================================
/// uint32_t Size The size in bytes of this function.
/// uint32_t Name The string table offset of the function name.
///
/// The optional data in a FunctionInfo object follows this fixed information
/// and consists of a stream of tuples that consist of:
///
/// ENCODING NAME DESCRIPTION
/// ========= =========== ====================================================
/// uint32_t InfoType An "InfoType" enumeration that describes the type
/// of optional data that is encoded.
/// uint32_t InfoLength The size in bytes of the encoded data that
/// immediately follows this length if this value is
/// greater than zero.
/// uint8_t[] InfoData Encoded bytes that represent the data for the
/// "InfoType". These bytes are only present if
/// "InfoLength" is greater than zero.
///
/// The "InfoType" is an enumeration:
///
/// enum InfoType {
/// EndOfList = 0u,
/// LineTableInfo = 1u,
/// InlineInfo = 2u
/// };
///
/// This stream of tuples is terminated by a "InfoType" whose value is
/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
/// the optional information list. This format allows us to add new optional
/// information data to a FunctionInfo object over time and allows older
/// clients to still parse the format and skip over any data that they don't
/// understand or want to parse.
///
/// So the function information encoding essientially looks like:
///
/// struct {
/// uint32_t Size;
/// uint32_t Name;
/// struct {
/// uint32_t InfoType;
/// uint32_t InfoLength;
/// uint8_t InfoData[InfoLength];
/// }[N];
/// }
///
/// Where "N" is the number of tuples.
struct FunctionInfo {
AddressRange Range;
uint32_t Name; ///< String table offset in the string table.
@ -39,23 +92,54 @@ struct FunctionInfo {
FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
: Range(Addr, Addr + Size), Name(N) {}
/// Query if a FunctionInfo has rich debug info.
///
/// \returns A bool that indicates if this object has something else than
/// range and name. When converting information from a symbol table and from
/// debug info, we might end up with multiple FunctionInfo objects for the
/// same range and we need to be able to tell which one is the better object
/// to use.
bool hasRichInfo() const {
/// Returns whether we have something else than range and name. When
/// converting information from a symbol table and from debug info, we
/// might end up with multiple FunctionInfo objects for the same range
/// and we need to be able to tell which one is the better object to use.
return OptLineTable.hasValue() || Inline.hasValue();
}
/// Query if a FunctionInfo object is valid.
///
/// Address and size can be zero and there can be no line entries for a
/// symbol so the only indication this entry is valid is if the name is
/// not zero. This can happen when extracting information from symbol
/// tables that do not encode symbol sizes. In that case only the
/// address and name will be filled in.
///
/// \returns A boolean indicating if this FunctionInfo is valid.
bool isValid() const {
/// Address and size can be zero and there can be no line entries for a
/// symbol so the only indication this entry is valid is if the name is
/// not zero. This can happen when extracting information from symbol
/// tables that do not encode symbol sizes. In that case only the
/// address and name will be filled in.
return Name != 0;
}
/// Decode an object from a binary data stream.
///
/// \param Data The binary stream to read the data from. This object must
/// have the data for the object starting at offset zero. The data
/// can contain more data than needed.
///
/// \param BaseAddr The FunctionInfo's start address and will be used as the
/// base address when decoding any contained information like the line table
/// and the inline info.
///
/// \returns An FunctionInfo or an error describing the issue that was
/// encountered during decoding.
static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
uint64_t BaseAddr);
/// Encode this object into FileWriter stream.
///
/// \param O The binary stream to write the data to at the current file
/// position.
///
/// \returns An error object that indicates failure or the offset of the
/// function info that was successfully written into the stream.
llvm::Expected<uint64_t> encode(FileWriter &O) const;
uint64_t startAddress() const { return Range.Start; }
uint64_t endAddress() const { return Range.End; }
uint64_t size() const { return Range.size(); }

View File

@ -1,19 +1,147 @@
//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/Support/DataExtractor.h"
using namespace llvm;
using namespace gsym;
/// FunctionInfo information type that is used to encode the optional data
/// that is associated with a FunctionInfo object.
enum InfoType : uint32_t {
EndOfList = 0u,
LineTableInfo = 1u,
InlineInfo = 2u
};
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): "
<< "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline;
return OS;
}
llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
uint64_t BaseAddr) {
FunctionInfo FI;
FI.Range.Start = BaseAddr;
uint64_t Offset = 0;
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset);
FI.Range.End = FI.Range.Start + Data.getU32(&Offset);
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset);
FI.Name = Data.getU32(&Offset);
if (FI.Name == 0)
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x",
Offset - 4, FI.Name);
bool Done = false;
while (!Done) {
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset);
const uint32_t IT = Data.getU32(&Offset);
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset);
const uint32_t InfoLength = Data.getU32(&Offset);
if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
Offset, IT);
DataExtractor InfoData(Data.getData().substr(Offset, InfoLength),
Data.isLittleEndian(),
Data.getAddressSize());
switch (IT) {
case InfoType::EndOfList:
Done = true;
break;
case InfoType::LineTableInfo:
if (Expected<LineTable> LT = LineTable::decode(InfoData, BaseAddr))
FI.OptLineTable = std::move(LT.get());
else
return LT.takeError();
break;
case InfoType::InlineInfo:
if (Expected<InlineInfo> II = InlineInfo::decode(InfoData, BaseAddr))
FI.Inline = std::move(II.get());
else
return II.takeError();
break;
default:
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": unsupported InfoType %u",
Offset-8, IT);
}
Offset += InfoLength;
}
return std::move(FI);
}
llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
// Align FunctionInfo data to a 4 byte alignment.
O.alignTo(4);
const uint64_t FuncInfoOffset = O.tell();
// Write the size in bytes of this function as a uint32_t. This can be zero
// if we just have a symbol from a symbol table and that symbol has no size.
O.writeU32(size());
// Write the name of this function as a uint32_t string table offset.
O.writeU32(Name);
if (OptLineTable.hasValue()) {
O.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
O.writeU32(0);
const auto StartOffset = O.tell();
llvm::Error err = OptLineTable->encode(O, Range.Start);
if (err)
return std::move(err);
const off_t Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
// Fixup the size of the LineTable data with the correct size.
O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the inline function info if we have any and if it is valid.
if (Inline.hasValue()) {
O.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
O.writeU32(0);
const auto StartOffset = O.tell();
llvm::Error err = Inline->encode(O, Range.Start);
if (err)
return std::move(err);
const off_t Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
// Fixup the size of the InlineInfo data with the correct size.
O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Terminate the data chunks with and end of list with zero size
O.writeU32(InfoType::EndOfList);
O.writeU32(0);
return FuncInfoOffset;
}

View File

@ -24,6 +24,23 @@
using namespace llvm;
using namespace gsym;
void checkError(ArrayRef<std::string> ExpectedMsgs, Error Err) {
ASSERT_TRUE(bool(Err));
size_t WhichMsg = 0;
Error Remaining =
handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) {
ASSERT_LT(WhichMsg, ExpectedMsgs.size());
// Use .str(), because googletest doesn't visualise a StringRef
// properly.
EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]);
});
EXPECT_EQ(WhichMsg, ExpectedMsgs.size());
EXPECT_FALSE(Remaining);
}
void checkError(std::string ExpectedMsg, Error Err) {
checkError(ArrayRef<std::string>{ExpectedMsg}, std::move(Err));
}
TEST(GSYMTest, TestFileEntry) {
// Make sure default constructed GSYM FileEntry has zeroes in the
// directory and basename string table indexes.
@ -137,22 +154,160 @@ TEST(GSYMTest, TestFunctionInfo) {
EXPECT_LT(FIWithLines, FIWithLinesWithHigherAddress);
}
void checkError(ArrayRef<std::string> ExpectedMsgs, Error Err) {
ASSERT_TRUE(Err.operator bool());
size_t WhichMsg = 0;
Error Remaining =
handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) {
ASSERT_LT(WhichMsg, ExpectedMsgs.size());
// Use .str(), because googletest doesn't visualise a StringRef
// properly.
EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]);
});
EXPECT_EQ(WhichMsg, ExpectedMsgs.size());
EXPECT_FALSE(Remaining);
static void TestFunctionInfoDecodeError(llvm::support::endianness ByteOrder,
std::string Bytes,
const uint64_t BaseAddr,
std::string ExpectedErrorMsg) {
uint8_t AddressSize = 4;
DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize);
llvm::Expected<FunctionInfo> Decoded = FunctionInfo::decode(Data, BaseAddr);
// Make sure decoding fails.
ASSERT_FALSE((bool)Decoded);
// Make sure decoded object is the same as the one we encoded.
checkError(ExpectedErrorMsg, Decoded.takeError());
}
void checkError(std::string ExpectedMsg, Error Err) {
checkError(ArrayRef<std::string>{ExpectedMsg}, std::move(Err));
TEST(GSYMTest, TestFunctionInfoDecodeErrors) {
// Test decoding FunctionInfo objects that ensure we report an appropriate
// error message.
const llvm::support::endianness ByteOrder = llvm::support::little;
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
FileWriter FW(OutStrm, ByteOrder);
const uint64_t BaseAddr = 0x100;
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x00000000: missing FunctionInfo Size");
FW.writeU32(0x100); // Function size.
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x00000004: missing FunctionInfo Name");
// Write out an invalid Name string table offset of zero.
FW.writeU32(0);
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x00000004: invalid FunctionInfo Name value 0x00000000");
// Modify the Name to be 0x00000001, which is a valid value.
FW.fixup32(0x00000001, 4);
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x00000008: missing FunctionInfo InfoType value");
auto FixupOffset = FW.tell();
FW.writeU32(1); // InfoType::LineTableInfo.
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x0000000c: missing FunctionInfo InfoType length");
FW.fixup32(4, FixupOffset); // Write an invalid InfoType enumeration value
FW.writeU32(0); // LineTableInfo InfoType data length.
TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
"0x00000008: unsupported InfoType 4");
}
static void TestFunctionInfoEncodeError(llvm::support::endianness ByteOrder,
const FunctionInfo &FI,
std::string ExpectedErrorMsg) {
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
FileWriter FW(OutStrm, ByteOrder);
Expected<uint64_t> ExpectedOffset = FI.encode(FW);
ASSERT_FALSE(ExpectedOffset);
checkError(ExpectedErrorMsg, ExpectedOffset.takeError());
}
TEST(GSYMTest, TestFunctionInfoEncodeErrors) {
const uint64_t FuncAddr = 0x1000;
const uint64_t FuncSize = 0x100;
const uint32_t InvalidName = 0;
const uint32_t ValidName = 1;
FunctionInfo InvalidNameFI(FuncAddr, FuncSize, InvalidName);
TestFunctionInfoEncodeError(llvm::support::little, InvalidNameFI,
"attempted to encode invalid FunctionInfo object");
FunctionInfo InvalidLineTableFI(FuncAddr, FuncSize, ValidName);
// Empty line tables are not valid. Verify if the encoding of anything
// in our line table fails, that we see get the error propagated.
InvalidLineTableFI.OptLineTable = LineTable();
TestFunctionInfoEncodeError(llvm::support::little, InvalidLineTableFI,
"attempted to encode invalid LineTable object");
FunctionInfo InvalidInlineInfoFI(FuncAddr, FuncSize, ValidName);
// Empty line tables are not valid. Verify if the encoding of anything
// in our line table fails, that we see get the error propagated.
InvalidInlineInfoFI.Inline = InlineInfo();
TestFunctionInfoEncodeError(llvm::support::little, InvalidInlineInfoFI,
"attempted to encode invalid InlineInfo object");
}
static void TestFunctionInfoEncodeDecode(llvm::support::endianness ByteOrder,
const FunctionInfo &FI) {
// Test encoding and decoding FunctionInfo objects.
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
FileWriter FW(OutStrm, ByteOrder);
llvm::Expected<uint64_t> ExpectedOffset = FI.encode(FW);
ASSERT_TRUE(bool(ExpectedOffset));
// Verify we got the encoded offset back from the encode function.
ASSERT_EQ(ExpectedOffset.get(), 0ULL);
std::string Bytes(OutStrm.str());
uint8_t AddressSize = 4;
DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize);
llvm::Expected<FunctionInfo> Decoded = FunctionInfo::decode(Data,
FI.Range.Start);
// Make sure decoding succeeded.
ASSERT_TRUE((bool)Decoded);
// Make sure decoded object is the same as the one we encoded.
EXPECT_EQ(FI, Decoded.get());
}
TEST(GSYMTest, TestFunctionInfoEncoding) {
constexpr uint64_t FuncAddr = 0x1000;
constexpr uint64_t FuncSize = 0x100;
constexpr uint32_t FuncName = 1;
constexpr uint32_t FileIdx = 1;
// Make sure that we can encode and decode a FunctionInfo with no line table
// or inline info.
FunctionInfo FI(FuncAddr, FuncSize, FuncName);
TestFunctionInfoEncodeDecode(llvm::support::little, FI);
TestFunctionInfoEncodeDecode(llvm::support::big, FI);
auto AddLinesLambda = [](FunctionInfo &FI) {
FI.OptLineTable = LineTable();
LineEntry Line0(FuncAddr+0x000, FileIdx, 10);
LineEntry Line1(FuncAddr+0x010, FileIdx, 11);
LineEntry Line2(FuncAddr+0x100, FileIdx, 1000);
FI.OptLineTable->push(Line0);
FI.OptLineTable->push(Line1);
FI.OptLineTable->push(Line2);
};
auto AddInlineLambda = [](FunctionInfo &FI) {
FI.Inline = InlineInfo();
FI.Inline->Ranges.insert(AddressRange(FuncAddr, FuncAddr+FuncSize));
InlineInfo Inline1;
Inline1.Ranges.insert(AddressRange(FuncAddr+0x10, FuncAddr+0x30));
Inline1.Name = 1;
Inline1.CallFile = 1;
Inline1.CallLine = 11;
FI.Inline->Children.push_back(Inline1);
};
// Make sure that we can encode and decode a FunctionInfo with a line table
// and no inline info.
FunctionInfo FILines(FuncAddr, FuncSize, FuncName);
AddLinesLambda(FILines);
TestFunctionInfoEncodeDecode(llvm::support::little, FILines);
TestFunctionInfoEncodeDecode(llvm::support::big, FILines);
// Make sure that we can encode and decode a FunctionInfo with no line table
// and with inline info.
FunctionInfo FIInline(FuncAddr, FuncSize, FuncName);
AddInlineLambda(FIInline);
TestFunctionInfoEncodeDecode(llvm::support::little, FIInline);
TestFunctionInfoEncodeDecode(llvm::support::big, FIInline);
// Make sure that we can encode and decode a FunctionInfo with no line table
// and with inline info.
FunctionInfo FIBoth(FuncAddr, FuncSize, FuncName);
AddLinesLambda(FIBoth);
AddInlineLambda(FIBoth);
TestFunctionInfoEncodeDecode(llvm::support::little, FIBoth);
TestFunctionInfoEncodeDecode(llvm::support::big, FIBoth);
}
static void TestInlineInfoEncodeDecode(llvm::support::endianness ByteOrder,