[Remarks] Add string deduplication using a string table

* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section. * Add parsing support for the string table in the RemarkParser. From this remark: ``` --- !Missed Pass: inline Name: NoDefinition DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c', Line: 7, Column: 3 } Function: printArgsNoRet Args: - Callee: printf - String: ' will not be inlined into ' - Caller: printArgsNoRet DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c', Line: 6, Column: 0 } - String: ' because its definition is unavailable' ... ``` to: ``` --- !Missed Pass: 0 Name: 1 DebugLoc: { File: 3, Line: 7, Column: 3 } Function: 2 Args: - Callee: 4 - String: 5 - Caller: 2 DebugLoc: { File: 3, Line: 6, Column: 0 } - String: 6 ... ``` And the string table in the .remarks/__remarks section containing: ``` inline\0NoDefinition\0printArgsNoRet\0 test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0 will not be inlined into \0 because its definition is unavailable\0 ``` This is mostly supposed to be used for testing purposes, but it gives us a 2x reduction in the remark size, and is an incremental change for the updates to the remarks file format. Differential Revision: https://reviews.llvm.org/D60227 llvm-svn: 359050
2025-01-31 12:41:49 +01:00 · 2019-04-24 00:06:24 +00:00 · 2019-04-24 00:06:24 +00:00 · 83f4e4b65a
commit 83f4e4b65a
parent 5f651d5aba
19 changed files with 456 additions and 21 deletions
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@ -1597,6 +1597,10 @@ A section containing metadata on remark diagnostics will be emitted when

 * a magic number: "REMARKS\0"
 * the version number: a little-endian uint64_t
+* the string table:
+  * the total size of the string table (the size itself excluded):
+    little-endian uint64_t
+  * a list of null-terminated strings
 * the absolute file path to the serialized remark diagnostics: a
  null-terminated string.

--- a/include/llvm/IR/RemarkStreamer.h
+++ b/include/llvm/IR/RemarkStreamer.h
@ -14,10 +14,11 @@
 #define LLVM_IR_REMARKSTREAMER_H

 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Regex.h"
 #include <string>
 #include <vector>

@ -34,6 +35,11 @@ class RemarkStreamer {
  /// The YAML streamer.
  yaml::Output YAMLOutput;

+  /// The string table containing all the unique strings used in the output.
+  /// The table will be serialized in a section to be consumed after the
+  /// compilation.
+  remarks::StringTable StrTab;
+
 public:
  RemarkStreamer(StringRef Filename, raw_ostream& OS);
  /// Return the filename that the remark diagnostics are emitted to.
@ -45,6 +51,9 @@ public:
  Error setFilter(StringRef Filter);
  /// Emit a diagnostic through the streamer.
  void emit(const DiagnosticInfoOptimizationBase &Diag);
+  /// The string table used during emission.
+  remarks::StringTable &getStringTable() { return StrTab; }
+  const remarks::StringTable &getStringTable() const { return StrTab; }
 };
 } // end namespace llvm

--- a/include/llvm/Remarks/RemarkParser.h
+++ b/include/llvm/Remarks/RemarkParser.h
@ -13,6 +13,7 @@
 #ifndef LLVM_REMARKS_REMARK_PARSER_H
 #define LLVM_REMARKS_REMARK_PARSER_H

+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Support/Error.h"
@ -32,6 +33,11 @@ struct Parser {
  /// This constructor should be only used for parsing YAML remarks.
  Parser(StringRef Buffer);

+  /// Create a parser parsing \p Buffer to Remark objects, using \p StrTabBuf as
+  /// string table.
+  /// This constructor should be only used for parsing YAML remarks.
+  Parser(StringRef Buffer, StringRef StrTabBuf);
+
  // Needed because ParserImpl is an incomplete type.
  ~Parser();

@ -40,6 +46,18 @@ struct Parser {
  Expected<const Remark *> getNext() const;
 };

+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+  /// The buffer mapped from the section contents.
+  StringRef Buffer;
+  /// Collection of offsets in the buffer for each string entry.
+  SmallVector<size_t, 8> Offsets;
+
+  Expected<StringRef> operator[](size_t Index);
+  ParsedStringTable(StringRef Buffer);
+};
+
 } // end namespace remarks
 } // end namespace llvm

--- a/include/llvm/Remarks/RemarkStringTable.h
+++ b/include/llvm/Remarks/RemarkStringTable.h
@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+  /// Allocator holding all the memory used by the map.
+  BumpPtrAllocator Allocator;
+  /// The string table containing all the unique strings used in the output.
+  /// It maps a string to an unique ID.
+  StringMap<unsigned, BumpPtrAllocator &> StrTab;
+  /// Total size of the string table when serialized.
+  size_t SerializedSize = 0;
+
+  StringTable() : Allocator(), StrTab(Allocator) {}
+  /// Add a string to the table. It returns an unique ID of the string.
+  std::pair<unsigned, StringRef> add(StringRef Str);
+  /// Serialize the string table to a stream. It is serialized as a little
+  /// endian uint64 (the size of the table in bytes) followed by a sequence of
+  /// NULL-terminated strings, where the N-th string is the string with the ID N
+  /// in the StrTab map.
+  void serialize(raw_ostream &OS) const;
+  /// Serialize the string table to a vector. This allows users to do the actual
+  /// writing to file/memory/other.
+  /// The string with the ID == N should be the N-th element in the vector.
+  std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@ -1905,6 +1905,11 @@ struct SequenceTraits<SmallVector<T, N>,
                      typename std::enable_if<CheckIsBool<
                          SequenceElementTraits<T>::flow>::value>::type>
    : SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+                      typename std::enable_if<CheckIsBool<
+                          SequenceElementTraits<T>::flow>::value>::type>
+    : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};

 // Sequences of fundamental types use flow formatting.
 template <typename T>
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Module &M) {
  support::endian::write64le(Version.data(), remarks::Version);
  OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));

+  // Emit the string table in the section.
+  // Note: we need to use the streamer here to emit it in the section. We can't
+  // just use the serialize function with a raw_ostream because of the way
+  // MCStreamers work.
+  const remarks::StringTable &StrTab = RS->getStringTable();
+  std::vector<StringRef> StrTabStrings = StrTab.serialize();
+  uint64_t StrTabSize = StrTab.SerializedSize;
+  // Emit the total size of the string table (the size itself excluded):
+  // little-endian uint64_t.
+  // The total size is located after the version number.
+  std::array<char, 8> StrTabSizeBuf;
+  support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+  OutStreamer->EmitBinaryData(
+      StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+  // Emit a list of null-terminated strings.
+  // Note: the order is important here: the ID used in the remarks corresponds
+  // to the position of the string in the section.
+  for (StringRef Str : StrTabStrings) {
+    OutStreamer->EmitBytes(Str);
+    // Explicitly emit a '\0'.
+    OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+  }
+
  // Emit the null-terminated absolute path to the remark file.
  // The path is located at the offset 0x4 in the section.
  StringRef FilenameRef = RS->getFilename();
--- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@ -18,4 +18,4 @@
 type = Library
 name = AsmPrinter
 parent = Libraries
-required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@ -43,6 +43,8 @@

 using namespace llvm;

+cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
+
 int llvm::getNextAvailablePluginDiagnosticKind() {
  static std::atomic<int> PluginKindID(DK_FirstPluginKind);
  return ++PluginKindID;
@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}

+template <typename T>
+static void mapRemarkHeader(
+    yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
+    T FunctionName, Optional<uint64_t> Hotness,
+    SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", RemarkName);
+  if (!io.outputting() || DL.isValid())
+    io.mapOptional("DebugLoc", DL);
+  io.mapRequired("Function", FunctionName);
+  io.mapOptional("Hotness", Hotness);
+  io.mapOptional("Args", Args);
+}
+
 namespace llvm {
 namespace yaml {

@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
      GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());

  StringRef PassName(OptDiag->PassName);
-  io.mapRequired("Pass", PassName);
-  io.mapRequired("Name", OptDiag->RemarkName);
-  if (!io.outputting() || DL.isValid())
-    io.mapOptional("DebugLoc", DL);
-  io.mapRequired("Function", FN);
-  io.mapOptional("Hotness", OptDiag->Hotness);
-  io.mapOptional("Args", OptDiag->Args);
+  if (UseStringTable) {
+    remarks::StringTable &StrTab =
+        reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+    unsigned PassID = StrTab.add(PassName).first;
+    unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
+    unsigned FunctionID = StrTab.add(FN).first;
+    mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
+                    OptDiag->Args);
+  } else {
+    mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
+                    OptDiag->Args);
+  }
 }

 template <> struct MappingTraits<DiagnosticLocation> {
@ -430,7 +451,15 @@ template <> struct MappingTraits<DiagnosticLocation> {
    unsigned Line = DL.getLine();
    unsigned Col = DL.getColumn();

-    io.mapRequired("File", File);
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      unsigned FileID = StrTab.add(File).first;
+      io.mapRequired("File", FileID);
+    } else {
+      io.mapRequired("File", File);
+    }
+
    io.mapRequired("Line", Line);
    io.mapRequired("Column", Col);
  }
@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<StringBlockVal> {
 template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
  static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
    assert(io.outputting() && "input not yet implemented");
-    // Emit a string block scalar for multiline strings, to preserve newlines.
-    if (StringRef(A.Val).count('\n') > 1) {
+
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      auto ValueID = StrTab.add(A.Val).first;
+      io.mapRequired(A.Key.data(), ValueID);
+    } else if (StringRef(A.Val).count('\n') > 1) {
      StringBlockVal S(A.Val);
      io.mapRequired(A.Key.data(), S);
-    } else
+    } else {
      io.mapRequired(A.Key.data(), A.Val);
+    }
    if (A.Loc.isValid())
      io.mapOptional("DebugLoc", A.Loc);
  }
--- a/lib/IR/LLVMBuild.txt
+++ b/lib/IR/LLVMBuild.txt
@ -18,4 +18,4 @@
 type = Library
 name = Core
 parent = Libraries
-required_libraries = BinaryFormat Support
+required_libraries = BinaryFormat Remarks Support
--- a/lib/IR/RemarkStreamer.cpp
+++ b/lib/IR/RemarkStreamer.cpp
@ -17,7 +17,7 @@ using namespace llvm;

 RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
    : Filename(Filename), OS(OS),
-      YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+      YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
  assert(!Filename.empty() && "This needs to be a real filename.");
 }

--- a/lib/Remarks/CMakeLists.txt
+++ b/lib/Remarks/CMakeLists.txt
@ -1,5 +1,6 @@
 add_llvm_library(LLVMRemarks
  Remark.cpp
  RemarkParser.cpp
+  RemarkStringTable.cpp
  YAMLRemarkParser.cpp
 )
--- a/lib/Remarks/RemarkParser.cpp
+++ b/lib/Remarks/RemarkParser.cpp
@ -22,6 +22,9 @@ using namespace llvm::remarks;

 Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {}

+Parser::Parser(StringRef Buf, StringRef StrTabBuf)
+    : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {}
+
 Parser::~Parser() = default;

 static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext() const {
  llvm_unreachable("Get next called with an unknown parsing implementation.");
 }

+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+  while (!InBuffer.empty()) {
+    // Strings are separated by '\0' bytes.
+    std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+    // We only store the offset from the beginning of the buffer.
+    Offsets.push_back(Split.first.data() - Buffer.data());
+    InBuffer = Split.second;
+  }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) {
+  if (Index >= Offsets.size())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "String with index %u is out of bounds (size = %u).", Index,
+        Offsets.size());
+
+  size_t Offset = Offsets[Index];
+  // If it's the last offset, we can't use the next offset to know the size of
+  // the string.
+  size_t NextOffset =
+      (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+  return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)

--- a/lib/Remarks/RemarkStringTable.cpp
+++ b/lib/Remarks/RemarkStringTable.cpp
@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+  size_t NextID = StrTab.size();
+  auto KV = StrTab.insert({Str, NextID});
+  // If it's a new string, add it to the final size.
+  if (KV.second)
+    SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+  // Can be either NextID or the previous ID if the string is already there.
+  return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+  // Emit the number of strings.
+  uint64_t StrTabSize = SerializedSize;
+  support::endian::write(OS, StrTabSize, support::little);
+  // Emit the sequence of strings.
+  for (StringRef Str : serialize()) {
+    OS << Str;
+    // Explicitly emit a '\0'.
+    OS.write('\0');
+  }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+  std::vector<StringRef> Strings{StrTab.size()};
+  for (const auto &KV : StrTab)
+    Strings[KV.second] = KV.first();
+  return Strings;
+}
--- a/lib/Remarks/YAMLRemarkParser.cpp
+++ b/lib/Remarks/YAMLRemarkParser.cpp
@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) {
  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
  if (!Value)
    return make_error<YAMLParseError>("expected a value of scalar type.", Node);
-  StringRef Tmp = Value->getRawValue();
+  StringRef Tmp;
+  if (!StrTab) {
+    Tmp = Value->getRawValue();
+  } else {
+    // If we have a string table, parse it as an unsigned.
+    unsigned StrID = 0;
+    if (Error E = parseUnsigned(StrID, Node))
+      return E;
+    if (Expected<StringRef> Str = (*StrTab)[StrID])
+      Tmp = *Str;
+    else
+      return Str.takeError();
+  }

  if (Tmp.front() == '\'')
    Tmp = Tmp.drop_front();
--- a/lib/Remarks/YAMLRemarkParser.h
+++ b/lib/Remarks/YAMLRemarkParser.h
@ -17,6 +17,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
@ -38,7 +39,8 @@ struct YAMLRemarkParser {
  raw_string_ostream ErrorStream;
  /// Temporary parsing buffer for the arguments.
  SmallVector<Argument, 8> TmpArgs;
-
+  /// The string table used for parsing strings.
+  Optional<ParsedStringTable> StrTab;
  /// The state used by the parser to parse a remark entry. Invalidated with
  /// every call to `parseYAMLElement`.
  struct ParseState {
@ -57,10 +59,13 @@ struct YAMLRemarkParser {
  /// not be containing any value.
  Optional<ParseState> State;

-  YAMLRemarkParser(StringRef Buf)
+  YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None)
      : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
-        TmpArgs() {
+        TmpArgs(), StrTab() {
    SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
+
+    if (StrTabBuf)
+      StrTab.emplace(*StrTabBuf);
  }

  /// Parse a YAML element.
@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImpl {
  /// Set to `true` if we had any errors during parsing.
  bool HasErrors = false;

-  YAMLParserImpl(StringRef Buf)
-      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf),
+  YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None)
+      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf),
        YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}

  static bool classof(const ParserImpl *PI) {
--- a/test/CodeGen/X86/remarks-section.ll
+++ b/test/CodeGen/X86/remarks-section.ll
@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -remarks-section -pass-remarks-output=%/t.yaml | FileCheck -DPATH=%/t.yaml %s
 ; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s
+; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -remarks-yaml-string-table -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s

 ; CHECK-LABEL: func1:

@ -11,6 +12,11 @@
 ; The version:
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
@ -24,10 +30,50 @@
 ; The version:
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-DARWIN-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
 ; CHECK-DARWIN-NEXT: .byte 0
+
+; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug
+; The magic number:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "REMARKS"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The version:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The size of the string table:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x71, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "prologepilog"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "StackSize"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "func1"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 48
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " stack bytes in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "asm-printer"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "InstructionCount"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 49
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " instructions in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The remark file path:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "[[PATH]]"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
 define void @func1() {
  ret void
 }
--- a/unittests/Remarks/CMakeLists.txt
+++ b/unittests/Remarks/CMakeLists.txt
@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS
  )

 add_llvm_unittest(RemarksTests
+  RemarksStrTabParsingTest.cpp
  YAMLRemarksParsingTest.cpp
  )
--- a/unittests/Remarks/RemarksStrTabParsingTest.cpp
+++ b/unittests/Remarks/RemarksStrTabParsingTest.cpp
@ -0,0 +1,39 @@
+//===- unittest/Support/RemarksStrTabParsingTest.cpp - StrTab tests -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(RemarksStrTab, ParsingEmpty) {
+  StringRef Empty("", 0);
+  remarks::ParsedStringTable StrTab(Empty);
+  Expected<StringRef> Nothing = StrTab[0];
+  EXPECT_FALSE(static_cast<bool>(Nothing));
+  EXPECT_EQ(toString(Nothing.takeError()),
+            "String with index 0 is out of bounds (size = 0).");
+}
+
+TEST(RemarksStrTab, ParsingGood) {
+  StringRef Strings("str1\0str2\0str3\0str4", 20);
+  remarks::ParsedStringTable StrTab(Strings);
+  Expected<StringRef> Result = StrTab[0];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str1");
+  Result = StrTab[1];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str2");
+  Result = StrTab[2];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str3");
+  Result = StrTab[3];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str4");
+}
--- a/unittests/Remarks/YAMLRemarksParsingTest.cpp
+++ b/unittests/Remarks/YAMLRemarksParsingTest.cpp
@ -492,3 +492,105 @@ TEST(YAMLRemarks, ContentsCAPI) {
  EXPECT_FALSE(LLVMRemarkParserHasError(Parser));
  LLVMRemarkParserDispose(Parser);
 }
+
+TEST(YAMLRemarks, ContentsStrTab) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 0\n"
+                  "Name: 1\n"
+                  "DebugLoc: { File: 2, Line: 3, Column: 12 }\n"
+                  "Function: 3\n"
+                  "Hotness: 4\n"
+                  "Args:\n"
+                  "  - Callee: 5\n"
+                  "  - String: 7\n"
+                  "  - Caller: 3\n"
+                  "    DebugLoc: { File: 2, Line: 2, Column: 0 }\n"
+                  "  - String: 8\n"
+                  "\n";
+
+  StringRef StrTabBuf =
+      StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 "
+                "will not be inlined into \0 because its definition is "
+                "unavailable",
+                115);
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_TRUE(*RemarkOrErr != nullptr);
+
+  const remarks::Remark &Remark = **RemarkOrErr;
+  EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
+  EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
+  EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
+  EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo");
+  EXPECT_TRUE(Remark.Loc);
+  const remarks::RemarkLocation &RL = *Remark.Loc;
+  EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+  EXPECT_EQ(RL.SourceLine, 3U);
+  EXPECT_EQ(RL.SourceColumn, 12U);
+  EXPECT_TRUE(Remark.Hotness);
+  EXPECT_EQ(*Remark.Hotness, 4U);
+  EXPECT_EQ(Remark.Args.size(), 4U);
+
+  unsigned ArgID = 0;
+  for (const remarks::Argument &Arg : Remark.Args) {
+    switch (ArgID) {
+    case 0:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Callee");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "bar");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 1:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into ");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 2: {
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Caller");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "foo");
+      EXPECT_TRUE(Arg.Loc);
+      const remarks::RemarkLocation &RL = *Arg.Loc;
+      EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+      EXPECT_EQ(RL.SourceLine, 2U);
+      EXPECT_EQ(RL.SourceColumn, 0U);
+      break;
+    }
+    case 3:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 38),
+                " because its definition is unavailable");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    default:
+      break;
+    }
+    ++ArgID;
+  }
+
+  RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_EQ(*RemarkOrErr, nullptr);
+}
+
+TEST(YAMLRemarks, ParsingBadStringTableIndex) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 50\n"
+                  "\n";
+
+  StringRef StrTabBuf = StringRef("inline");
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> Remark = Parser.getNext();
+  EXPECT_FALSE(Remark); // Expect an error here.
+
+  std::string ErrorStr;
+  raw_string_ostream Stream(ErrorStr);
+  handleAllErrors(Remark.takeError(),
+                  [&](const ErrorInfoBase &EIB) { EIB.log(Stream); });
+  EXPECT_TRUE(
+      StringRef(Stream.str())
+          .contains("String with index 50 is out of bounds (size = 1)."));
+}