From 26fcd8455c5e027d97b9b3fdbc9e27a87a605647 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 12 May 2016 17:45:44 +0000 Subject: [PATCH] Make CodeView record serialization more generic. This introduces a variadic template and some helper macros to safely and correctly deserialize many types of common record fields while maintaining error checking. Differential Revision: http://reviews.llvm.org/D20183 Reviewed By: rnk, amccarth llvm-svn: 269315 --- .../DebugInfo/CodeView/RecordSerialization.h | 205 +++++++++++++ include/llvm/DebugInfo/CodeView/TypeRecord.h | 270 ++++-------------- lib/DebugInfo/CodeView/CMakeLists.txt | 2 +- .../CodeView/RecordSerialization.cpp | 157 ++++++++++ lib/DebugInfo/CodeView/TypeStream.cpp | 97 ------- tools/llvm-readobj/COFFDumper.cpp | 25 +- 6 files changed, 426 insertions(+), 330 deletions(-) create mode 100644 include/llvm/DebugInfo/CodeView/RecordSerialization.h create mode 100644 lib/DebugInfo/CodeView/RecordSerialization.cpp delete mode 100644 lib/DebugInfo/CodeView/TypeStream.cpp diff --git a/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/include/llvm/DebugInfo/CodeView/RecordSerialization.h new file mode 100644 index 00000000000..4f429307f91 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/RecordSerialization.h @@ -0,0 +1,205 @@ +//===- RecordSerialization.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H +#define LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include +#include + +namespace llvm { +namespace codeview { +using llvm::support::little32_t; +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; + +/// Reinterpret a byte array as an array of characters. Does not interpret as +/// a C string, as StringRef has several helpers (split) that make that easy. +StringRef getBytesAsCharacters(ArrayRef LeafData); +StringRef getBytesAsCString(ArrayRef LeafData); + +/// Consumes sizeof(T) bytes from the given byte sequence. Returns an error if +/// there are not enough bytes remaining. Reinterprets the consumed bytes as a +/// T object and points 'Res' at them. +template +inline std::error_code consumeObject(U &Data, const T *&Res) { + if (Data.size() < sizeof(*Res)) + return std::make_error_code(std::errc::illegal_byte_sequence); + Res = reinterpret_cast(Data.data()); + Data = Data.drop_front(sizeof(*Res)); + return std::error_code(); +} + +inline std::error_code consume(ArrayRef &Data) { + return std::error_code(); +} + +/// Decodes a numeric "leaf" value. These are integer literals encountered in +/// the type stream. If the value is positive and less than LF_NUMERIC (1 << +/// 15), it is emitted directly in Data. Otherwise, it has a tag like LF_CHAR +/// that indicates the bitwidth and sign of the numeric data. +std::error_code consume(ArrayRef &Data, APSInt &Num); +std::error_code consume(StringRef &Data, APSInt &Num); + +/// Decodes a numeric leaf value that is known to be a particular type. +std::error_code consume_numeric(ArrayRef &Data, uint64_t &Value); + +/// Decodes signed and unsigned fixed-length integers. +std::error_code consume(ArrayRef &Data, uint32_t &Item); +std::error_code consume(StringRef &Data, uint32_t &Item); +std::error_code consume(ArrayRef &Data, int32_t &Item); + +/// Decodes a null terminated string. +std::error_code consume(ArrayRef &Data, StringRef &Item); + +/// Decodes an arbitrary object whose layout matches that of the underlying +/// byte sequence, and returns a pointer to the object. +template +std::error_code consume(ArrayRef &Data, T *&Item) { + return consumeObject(Data, Item); +} + +template struct serialize_conditional_impl { + serialize_conditional_impl(T &Item, U Func) : Item(Item), Func(Func) {} + + std::error_code deserialize(ArrayRef &Data) const { + if (!Func()) + return std::error_code(); + return consume(Data, Item); + } + + T &Item; + U Func; +}; + +template +serialize_conditional_impl serialize_conditional(T &Item, U Func) { + return serialize_conditional_impl(Item, Func); +} + +template struct serialize_array_impl { + serialize_array_impl(ArrayRef &Item, U Func) : Item(Item), Func(Func) {} + + std::error_code deserialize(ArrayRef &Data) const { + uint32_t N = Func(); + if (N == 0) + return std::error_code(); + + uint32_t Size = sizeof(T) * N; + if (Data.size() < Size) + return std::make_error_code(std::errc::illegal_byte_sequence); + + Item = ArrayRef(reinterpret_cast(Data.data()), N); + Data = Data.drop_front(Size); + return std::error_code(); + } + + ArrayRef &Item; + U Func; +}; + +template struct serialize_array_tail_impl { + serialize_array_tail_impl(std::vector &Item) : Item(Item) {} + + std::error_code deserialize(ArrayRef &Data) const { + T Field; + while (!Data.empty()) { + if (auto EC = consume(Data, Field)) + return EC; + Item.push_back(Field); + } + return std::error_code(); + } + + std::vector &Item; +}; + +template struct serialize_numeric_impl { + serialize_numeric_impl(T &Item) : Item(Item) {} + + std::error_code deserialize(ArrayRef &Data) const { + return consume_numeric(Data, Item); + } + + T &Item; +}; + +template +serialize_array_impl serialize_array(ArrayRef &Item, U Func) { + return serialize_array_impl(Item, Func); +} + +template +serialize_array_tail_impl serialize_array_tail(std::vector &Item) { + return serialize_array_tail_impl(Item); +} + +template serialize_numeric_impl serialize_numeric(T &Item) { + return serialize_numeric_impl(Item); +} + +// This field is only present in the byte record if the condition is true. The +// condition is evaluated lazily, so it can depend on items that were +// deserialized +// earlier. +#define CV_CONDITIONAL_FIELD(I, C) \ + serialize_conditional(I, [&]() { return !!(C); }) + +// This is an array of N items, where N is evaluated lazily, so it can refer +// to a field deserialized earlier. +#define CV_ARRAY_FIELD_N(I, N) serialize_array(I, [&]() { return N; }) + +// This is an array that exhausts the remainder of the input buffer. +#define CV_ARRAY_FIELD_TAIL(I) serialize_array_tail(I) + +#define CV_NUMERIC_FIELD(I) serialize_numeric(I) + +template +std::error_code consume(ArrayRef &Data, + const serialize_conditional_impl &Item) { + return Item.deserialize(Data); +} + +template +std::error_code consume(ArrayRef &Data, + const serialize_array_impl &Item) { + return Item.deserialize(Data); +} + +template +std::error_code consume(ArrayRef &Data, + const serialize_array_tail_impl &Item) { + return Item.deserialize(Data); +} + +template +std::error_code consume(ArrayRef &Data, + const serialize_numeric_impl &Item) { + return Item.deserialize(Data); +} + +template +std::error_code consume(ArrayRef &Data, T &&X, U &&Y, + Args &&... Rest) { + if (auto EC = consume(Data, X)) + return EC; + return consume(Data, Y, std::forward(Rest)...); +} + +#define CV_DESERIALIZE(...) \ + if (auto EC = consume(__VA_ARGS__)) \ + return EC; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h index fa675c7af12..2fbbd44f86c 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -14,10 +14,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/ErrorOr.h" #include -#include namespace llvm { namespace codeview { @@ -26,79 +26,6 @@ using llvm::support::little32_t; using llvm::support::ulittle16_t; using llvm::support::ulittle32_t; -/// Decodes a numeric "leaf" value. These are integer literals encountered in -/// the type stream. If the value is positive and less than LF_NUMERIC (1 << -/// 15), it is emitted directly in Data. Otherwise, it has a tag like LF_CHAR -/// that indicates the bitwidth and sign of the numeric data. -bool decodeNumericLeaf(ArrayRef &Data, APSInt &Num); - -inline bool decodeNumericLeaf(StringRef &Data, APSInt &Num) { - ArrayRef Bytes(reinterpret_cast(Data.data()), - Data.size()); - bool Success = decodeNumericLeaf(Bytes, Num); - Data = StringRef(reinterpret_cast(Bytes.data()), Bytes.size()); - return Success; -} - -/// Decode a numeric leaf value that is known to be a uint32_t. -bool decodeUIntLeaf(ArrayRef &Data, uint64_t &Num); - -/// Reinterpret a byte array as an array of characters. Does not interpret as -/// a C string, as StringRef has several helpers (split) that make that easy. -inline StringRef getBytesAsCharacters(ArrayRef LeafData) { - return StringRef(reinterpret_cast(LeafData.data()), - LeafData.size()); -} - -inline StringRef getBytesAsCString(ArrayRef LeafData) { - return getBytesAsCharacters(LeafData).split('\0').first; -} - -/// Consumes sizeof(T) bytes from the given byte sequence. Returns an error if -/// there are not enough bytes remaining. Reinterprets the consumed bytes as a -/// T object and points 'Res' at them. -template -inline std::error_code consumeObject(U &Data, const T *&Res) { - if (Data.size() < sizeof(*Res)) - return std::make_error_code(std::errc::illegal_byte_sequence); - Res = reinterpret_cast(Data.data()); - Data = Data.drop_front(sizeof(*Res)); - return std::error_code(); -} - -inline std::error_code consumeCString(ArrayRef &Data, StringRef &Str) { - if (Data.empty()) - return std::make_error_code(std::errc::illegal_byte_sequence); - - StringRef Rest; - std::tie(Str, Rest) = getBytesAsCharacters(Data).split('\0'); - // We expect this to be null terminated. If it was not, it is an error. - if (Data.size() == Str.size()) - return std::make_error_code(std::errc::illegal_byte_sequence); - - Data = ArrayRef(Rest.bytes_begin(), Rest.bytes_end()); - return std::error_code(); -} - -template -inline std::error_code consumeArray(ArrayRef &Data, - ArrayRef &Result, uint32_t N) { - uint32_t Size = sizeof(T) * N; - if (Data.size() < Size) - return std::make_error_code(std::errc::illegal_byte_sequence); - - Result = ArrayRef(reinterpret_cast(Data.data()), N); - Data = Data.drop_front(Size); - return std::error_code(); -} - -inline std::error_code consumeUInt32(StringRef &Data, uint32_t &Res) { - const support::ulittle32_t *IntPtr; - if (auto EC = consumeObject(Data, IntPtr)) - return EC; - Res = *IntPtr; - return std::error_code(); -} /// Equvalent to CV_fldattr_t in cvinfo.h. struct MemberAttributes { @@ -278,8 +205,7 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + CV_DESERIALIZE(Data, L); return MemberFunctionRecord(L->ReturnType, L->ClassType, L->ThisType, L->CallConv, L->Options, L->NumParameters, @@ -328,12 +254,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); + return MemberFunctionIdRecord(L->ClassType, L->FunctionType, Name); } @@ -365,12 +288,9 @@ public: return std::make_error_code(std::errc::illegal_byte_sequence); const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - ArrayRef Indices; - if (auto EC = consumeArray(Data, Indices, L->NumArgs)) - return EC; + CV_DESERIALIZE(Data, L, CV_ARRAY_FIELD_N(Indices, L->NumArgs)); + return StringListRecord(Kind, Indices); } @@ -505,12 +425,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); + return NestedTypeRecord(L->Type, Name); } @@ -539,15 +456,10 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - uint64_t Size; - if (!decodeUIntLeaf(Data, Size)) - return std::make_error_code(std::errc::illegal_byte_sequence); StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name); + return ArrayRecord(L->ElementType, L->IndexType, Size, Name); } @@ -614,10 +526,10 @@ public: StringRef Name; StringRef UniqueName; uint16_t Props; - const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name, + CV_CONDITIONAL_FIELD(UniqueName, L->hasUniqueName())); Props = L->Properties; uint16_t WrtValue = (Props & WinRTKindMask) >> WinRTKindShift; @@ -625,15 +537,6 @@ public: uint16_t HfaMask = (Props & HfaKindMask) >> HfaKindShift; HfaKind Hfa = static_cast(HfaMask); - if (!decodeUIntLeaf(Data, Size)) - return std::make_error_code(std::errc::illegal_byte_sequence); - if (auto EC = consumeCString(Data, Name)) - return EC; - if (Props & uint16_t(ClassOptions::HasUniqueName)) { - if (auto EC = consumeCString(Data, UniqueName)) - return EC; - } - ClassOptions Options = static_cast(Props); return ClassRecord(Kind, L->MemberCount, Options, Hfa, WRT, L->FieldList, L->DerivedFrom, L->VShape, Size, Name, UniqueName); @@ -655,6 +558,10 @@ private: // SizeOf: The 'sizeof' the UDT in bytes is encoded as an LF_NUMERIC // integer. // Name: The null-terminated name follows. + + bool hasUniqueName() const { + return Properties & uint16_t(ClassOptions::HasUniqueName); + } }; HfaKind Hfa; @@ -681,23 +588,13 @@ struct UnionRecord : public TagRecord { uint16_t Props; const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name, + CV_CONDITIONAL_FIELD(UniqueName, L->hasUniqueName())); Props = L->Properties; uint16_t HfaMask = (Props & HfaKindMask) >> HfaKindShift; HfaKind Hfa = static_cast(HfaMask); - - if (!decodeUIntLeaf(Data, Size)) - return std::make_error_code(std::errc::illegal_byte_sequence); - if (auto EC = consumeCString(Data, Name)) - return EC; - if (Props & uint16_t(ClassOptions::HasUniqueName)) { - if (auto EC = consumeCString(Data, UniqueName)) - return EC; - } - ClassOptions Options = static_cast(Props); return UnionRecord(L->MemberCount, Options, Hfa, L->FieldList, Size, Name, UniqueName); @@ -714,6 +611,10 @@ private: // SizeOf: The 'sizeof' the UDT in bytes is encoded as an LF_NUMERIC // integer. // Name: The null-terminated name follows. + + bool hasUniqueName() const { + return Properties & uint16_t(ClassOptions::HasUniqueName); + } }; HfaKind Hfa; @@ -732,14 +633,11 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + StringRef Name; + CV_DESERIALIZE(Data, L, Name); uint16_t P = L->Properties; ClassOptions Options = static_cast(P); - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; return EnumRecord(L->NumEnumerators, Options, L->FieldListType, Name, Name, L->UnderlyingType); } @@ -839,12 +737,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); + return TypeServer2Record(StringRef(L->Guid, 16), L->Age, Name); } @@ -875,12 +770,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); + return StringIdRecord(L->id, Name); } @@ -908,12 +800,8 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); return FuncIdRecord(L->ParentScope, L->FunctionType, Name); } @@ -946,8 +834,7 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + CV_DESERIALIZE(Data, L); return UdtSourceLineRecord(L->UDT, L->SourceFile, L->LineNumber); } @@ -977,12 +864,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - ArrayRef Indices; - if (auto EC = consumeArray(Data, Indices, L->NumArgs)) - return EC; + CV_DESERIALIZE(Data, L, CV_ARRAY_FIELD_N(Indices, L->NumArgs)); + return BuildInfoRecord(Indices); } @@ -1015,19 +899,10 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; - std::vector Names; - while (!Data.empty()) { - if (auto EC = consumeCString(Data, Name)) - return EC; - Names.push_back(Name); - } + CV_DESERIALIZE(Data, L, Name, CV_ARRAY_FIELD_TAIL(Names)); + return VirtualTableRecord(L->CompleteClass, L->OverriddenVFTable, L->VFPtrOffset, Name, Names); } @@ -1072,23 +947,16 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + StringRef Name; + int32_t VFTableOffset = 0; + + CV_DESERIALIZE(Data, L, CV_CONDITIONAL_FIELD( + VFTableOffset, L->Attrs.isIntroducedVirtual()), + Name); MethodOptions Options = L->Attrs.getFlags(); MethodKind MethKind = L->Attrs.getMethodKind(); MemberAccess Access = L->Attrs.getAccess(); - int32_t VFTableOffset = 0; - if (L->Attrs.isIntroducedVirtual()) { - const little32_t *L; - if (consumeObject(Data, L)) - return std::make_error_code(std::errc::illegal_byte_sequence); - VFTableOffset = *L; - } - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; - return OneMethodRecord(L->Type, MethKind, Options, Access, VFTableOffset, Name); } @@ -1135,19 +1003,13 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; + int32_t VFTableOffset = 0; + CV_DESERIALIZE(Data, L, CV_CONDITIONAL_FIELD( + VFTableOffset, L->Attrs.isIntroducedVirtual())); MethodOptions Options = L->Attrs.getFlags(); MethodKind MethKind = L->Attrs.getMethodKind(); MemberAccess Access = L->Attrs.getAccess(); - int32_t VFTableOffset = 0; - if (L->Attrs.isIntroducedVirtual()) { - const little32_t *L; - if (consumeObject(Data, L)) - return std::make_error_code(std::errc::illegal_byte_sequence); - VFTableOffset = *L; - } return MethodListRecord(L->Type, MethKind, Options, Access, VFTableOffset); } @@ -1186,11 +1048,8 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); return OverloadedMethodRecord(L->MethodCount, L->MethList, Name); } @@ -1222,15 +1081,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - uint64_t Offset; - if (!decodeUIntLeaf(Data, Offset)) - return std::make_error_code(std::errc::illegal_byte_sequence); StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset), Name); return DataMemberRecord(L->Attrs.getAccess(), L->Type, Offset, Name); } @@ -1264,12 +1117,8 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Name); return StaticDataMemberRecord(L->Attrs.getAccess(), L->Type, Name); } @@ -1300,18 +1149,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - - if (Data.empty()) - return std::make_error_code(std::errc::illegal_byte_sequence); APSInt Value; - if (!decodeNumericLeaf(Data, Value)) - return std::make_error_code(std::errc::illegal_byte_sequence); - StringRef Name; - if (auto EC = consumeCString(Data, Name)) - return EC; + CV_DESERIALIZE(Data, L, Value, Name); return EnumeratorRecord(L->Attrs.getAccess(), Value, Name); } @@ -1366,12 +1206,8 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - uint64_t Offset; - if (!decodeUIntLeaf(Data, Offset)) - return std::make_error_code(std::errc::illegal_byte_sequence); + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset)); return BaseClassRecord(L->Attrs.getAccess(), L->BaseType, Offset); } @@ -1403,15 +1239,9 @@ public: static ErrorOr deserialize(TypeRecordKind Kind, ArrayRef &Data) { const Layout *L = nullptr; - if (auto EC = consumeObject(Data, L)) - return EC; - uint64_t Offset; uint64_t Index; - if (!decodeUIntLeaf(Data, Offset)) - return std::make_error_code(std::errc::illegal_byte_sequence); - if (!decodeUIntLeaf(Data, Index)) - return std::make_error_code(std::errc::illegal_byte_sequence); + CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset), CV_NUMERIC_FIELD(Index)); return VirtualBaseClassRecord(L->Attrs.getAccess(), L->BaseType, L->VBPtrType, Offset, Index); diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 2be582f8c1a..cdca673474d 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -4,10 +4,10 @@ add_llvm_library(LLVMDebugInfoCodeView ListRecordBuilder.cpp MemoryTypeTableBuilder.cpp MethodListRecordBuilder.cpp + RecordSerialization.cpp TypeDumper.cpp TypeRecordBuilder.cpp TypeTableBuilder.cpp - TypeStream.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView diff --git a/lib/DebugInfo/CodeView/RecordSerialization.cpp b/lib/DebugInfo/CodeView/RecordSerialization.cpp new file mode 100644 index 00000000000..c142a01ef33 --- /dev/null +++ b/lib/DebugInfo/CodeView/RecordSerialization.cpp @@ -0,0 +1,157 @@ +//===-- RecordSerialization.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utilities for serializing and deserializing CodeView records. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::support; + +/// Reinterpret a byte array as an array of characters. Does not interpret as +/// a C string, as StringRef has several helpers (split) that make that easy. +StringRef llvm::codeview::getBytesAsCharacters(ArrayRef LeafData) { + return StringRef(reinterpret_cast(LeafData.data()), + LeafData.size()); +} + +StringRef llvm::codeview::getBytesAsCString(ArrayRef LeafData) { + return getBytesAsCharacters(LeafData).split('\0').first; +} + +std::error_code llvm::codeview::consume(ArrayRef &Data, APSInt &Num) { + // Used to avoid overload ambiguity on APInt construtor. + bool FalseVal = false; + if (Data.size() < 2) + return std::make_error_code(std::errc::illegal_byte_sequence); + uint16_t Short = *reinterpret_cast(Data.data()); + Data = Data.drop_front(2); + if (Short < LF_NUMERIC) { + Num = APSInt(APInt(/*numBits=*/16, Short, /*isSigned=*/false), + /*isUnsigned=*/true); + return std::error_code(); + } + switch (Short) { + case LF_CHAR: + Num = APSInt(APInt(/*numBits=*/8, + *reinterpret_cast(Data.data()), + /*isSigned=*/true), + /*isUnsigned=*/false); + Data = Data.drop_front(1); + return std::error_code(); + case LF_SHORT: + Num = APSInt(APInt(/*numBits=*/16, + *reinterpret_cast(Data.data()), + /*isSigned=*/true), + /*isUnsigned=*/false); + Data = Data.drop_front(2); + return std::error_code(); + case LF_USHORT: + Num = APSInt(APInt(/*numBits=*/16, + *reinterpret_cast(Data.data()), + /*isSigned=*/false), + /*isUnsigned=*/true); + Data = Data.drop_front(2); + return std::error_code(); + case LF_LONG: + Num = APSInt(APInt(/*numBits=*/32, + *reinterpret_cast(Data.data()), + /*isSigned=*/true), + /*isUnsigned=*/false); + Data = Data.drop_front(4); + return std::error_code(); + case LF_ULONG: + Num = APSInt(APInt(/*numBits=*/32, + *reinterpret_cast(Data.data()), + /*isSigned=*/FalseVal), + /*isUnsigned=*/true); + Data = Data.drop_front(4); + return std::error_code(); + case LF_QUADWORD: + Num = APSInt(APInt(/*numBits=*/64, + *reinterpret_cast(Data.data()), + /*isSigned=*/true), + /*isUnsigned=*/false); + Data = Data.drop_front(8); + return std::error_code(); + case LF_UQUADWORD: + Num = APSInt(APInt(/*numBits=*/64, + *reinterpret_cast(Data.data()), + /*isSigned=*/false), + /*isUnsigned=*/true); + Data = Data.drop_front(8); + return std::error_code(); + } + return std::make_error_code(std::errc::illegal_byte_sequence); +} + +std::error_code llvm::codeview::consume(StringRef &Data, APSInt &Num) { + ArrayRef Bytes(Data.bytes_begin(), Data.bytes_end()); + auto EC = consume(Bytes, Num); + Data = StringRef(reinterpret_cast(Bytes.data()), Bytes.size()); + return EC; +} + +/// Decode a numeric leaf value that is known to be a uint64_t. +std::error_code llvm::codeview::consume_numeric(ArrayRef &Data, + uint64_t &Num) { + APSInt N; + if (auto EC = consume(Data, N)) + return EC; + if (N.isSigned() || !N.isIntN(64)) + return std::make_error_code(std::errc::illegal_byte_sequence); + Num = N.getLimitedValue(); + return std::error_code(); +} + +std::error_code llvm::codeview::consume(ArrayRef &Data, + uint32_t &Item) { + const support::ulittle32_t *IntPtr; + if (auto EC = consumeObject(Data, IntPtr)) + return EC; + Item = *IntPtr; + return std::error_code(); +} + +std::error_code llvm::codeview::consume(StringRef &Data, uint32_t &Item) { + ArrayRef Bytes(Data.bytes_begin(), Data.bytes_end()); + auto EC = consume(Bytes, Item); + Data = StringRef(reinterpret_cast(Bytes.data()), Bytes.size()); + return EC; +} + +std::error_code llvm::codeview::consume(ArrayRef &Data, + int32_t &Item) { + const support::little32_t *IntPtr; + if (auto EC = consumeObject(Data, IntPtr)) + return EC; + Item = *IntPtr; + return std::error_code(); +} + +std::error_code llvm::codeview::consume(ArrayRef &Data, + StringRef &Item) { + if (Data.empty()) + return std::make_error_code(std::errc::illegal_byte_sequence); + + StringRef Rest; + std::tie(Item, Rest) = getBytesAsCharacters(Data).split('\0'); + // We expect this to be null terminated. If it was not, it is an error. + if (Data.size() == Item.size()) + return std::make_error_code(std::errc::illegal_byte_sequence); + + Data = ArrayRef(Rest.bytes_begin(), Rest.bytes_end()); + return std::error_code(); +} diff --git a/lib/DebugInfo/CodeView/TypeStream.cpp b/lib/DebugInfo/CodeView/TypeStream.cpp deleted file mode 100644 index 82d3f4d7e86..00000000000 --- a/lib/DebugInfo/CodeView/TypeStream.cpp +++ /dev/null @@ -1,97 +0,0 @@ -//===-- TypeStream.cpp ----------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Utilities for parsing CodeView type streams. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" - -using namespace llvm; -using namespace llvm::codeview; -using namespace llvm::support; - -bool llvm::codeview::decodeNumericLeaf(ArrayRef &Data, APSInt &Num) { - // Used to avoid overload ambiguity on APInt construtor. - bool FalseVal = false; - if (Data.size() < 2) - return false; - uint16_t Short = *reinterpret_cast(Data.data()); - Data = Data.drop_front(2); - if (Short < LF_NUMERIC) { - Num = APSInt(APInt(/*numBits=*/16, Short, /*isSigned=*/false), - /*isUnsigned=*/true); - return true; - } - switch (Short) { - case LF_CHAR: - Num = APSInt(APInt(/*numBits=*/8, - *reinterpret_cast(Data.data()), - /*isSigned=*/true), - /*isUnsigned=*/false); - Data = Data.drop_front(1); - return true; - case LF_SHORT: - Num = APSInt(APInt(/*numBits=*/16, - *reinterpret_cast(Data.data()), - /*isSigned=*/true), - /*isUnsigned=*/false); - Data = Data.drop_front(2); - return true; - case LF_USHORT: - Num = APSInt(APInt(/*numBits=*/16, - *reinterpret_cast(Data.data()), - /*isSigned=*/false), - /*isUnsigned=*/true); - Data = Data.drop_front(2); - return true; - case LF_LONG: - Num = APSInt(APInt(/*numBits=*/32, - *reinterpret_cast(Data.data()), - /*isSigned=*/true), - /*isUnsigned=*/false); - Data = Data.drop_front(4); - return true; - case LF_ULONG: - Num = APSInt(APInt(/*numBits=*/32, - *reinterpret_cast(Data.data()), - /*isSigned=*/FalseVal), - /*isUnsigned=*/true); - Data = Data.drop_front(4); - return true; - case LF_QUADWORD: - Num = APSInt(APInt(/*numBits=*/64, - *reinterpret_cast(Data.data()), - /*isSigned=*/true), - /*isUnsigned=*/false); - Data = Data.drop_front(8); - return true; - case LF_UQUADWORD: - Num = APSInt(APInt(/*numBits=*/64, - *reinterpret_cast(Data.data()), - /*isSigned=*/false), - /*isUnsigned=*/true); - Data = Data.drop_front(8); - return true; - } - return false; -} - -/// Decode a numeric leaf value that is known to be a uint32_t. -bool llvm::codeview::decodeUIntLeaf(ArrayRef &Data, uint64_t &Num) { - APSInt N; - if (!decodeNumericLeaf(Data, N)) - return false; - if (N.isSigned() || !N.isIntN(64)) - return false; - Num = N.getLimitedValue(); - return true; -} diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index ebb52aa2acd..6cb543b4251 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDumper.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" @@ -770,8 +771,8 @@ void COFFDumper::initializeFileAndStringTables(StringRef Data) { // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(consumeUInt32(Data, SubType)); - error(consumeUInt32(Data, SubSectionSize)); + error(consume(Data, SubType)); + error(consume(Data, SubSectionSize)); if (SubSectionSize > Data.size()) return error(object_error::parse_failed); switch (ModuleSubstreamKind(SubType)) { @@ -802,7 +803,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, W.printNumber("Section", SectionName, Obj->getSectionID(Section)); uint32_t Magic; - error(consumeUInt32(Data, Magic)); + error(consume(Data, Magic)); W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) return error(object_error::parse_failed); @@ -813,8 +814,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(consumeUInt32(Data, SubType)); - error(consumeUInt32(Data, SubSectionSize)); + error(consume(Data, SubType)); + error(consume(Data, SubSectionSize)); ListScope S(W, "Subsection"); W.printEnum("SubSectionType", SubType, makeArrayRef(SubSectionTypes)); @@ -1211,7 +1212,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, case S_CALLEES: { ListScope S(W, Kind == S_CALLEES ? "Callees" : "Callers"); uint32_t Count; - error(consumeUInt32(SymData, Count)); + error(consume(SymData, Count)); for (uint32_t I = 0; I < Count; ++I) { const TypeIndex *FuncID; error(consumeObject(SymData, FuncID)); @@ -1500,7 +1501,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, error(consumeObject(SymData, Constant)); printTypeIndex("Type", Constant->Type); APSInt Value; - if (!decodeNumericLeaf(SymData, Value)) + if (consume(SymData, Value)) error(object_error::parse_failed); W.printNumber("Value", Value); StringRef Name = SymData.split('\0').first; @@ -1551,7 +1552,7 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { StringRef Data = Subsection; uint32_t Signature; - error(consumeUInt32(Data, Signature)); + error(consume(Data, Signature)); bool HasExtraFiles = Signature == unsigned(InlineeLinesSignature::ExtraFiles); while (!Data.empty()) { @@ -1564,12 +1565,12 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { if (HasExtraFiles) { uint32_t ExtraFileCount; - error(consumeUInt32(Data, ExtraFileCount)); + error(consume(Data, ExtraFileCount)); W.printNumber("ExtraFileCount", ExtraFileCount); ListScope ExtraFiles(W, "ExtraFiles"); for (unsigned I = 0; I < ExtraFileCount; ++I) { uint32_t FileID; - error(consumeUInt32(Data, FileID)); + error(consume(Data, FileID)); printFileNameForOffset("FileID", FileID); } } @@ -1606,7 +1607,7 @@ StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) { // The string table offset comes first before the file checksum. StringRef Data = CVFileChecksumTable.drop_front(FileOffset); uint32_t StringOffset; - error(consumeUInt32(Data, StringOffset)); + error(consume(Data, StringOffset)); // Check if the string table offset is valid. if (StringOffset >= CVStringTable.size()) @@ -1631,7 +1632,7 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName, W.printBinaryBlock("Data", Data); uint32_t Magic; - error(consumeUInt32(Data, Magic)); + error(consume(Data, Magic)); W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) return error(object_error::parse_failed);