From 464d581c02d75a30d345f2f16dbd353dfdf6904d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 21 Mar 2018 19:06:06 +0000 Subject: [PATCH] [InstrProf] Support for external functions in text format. Summary: External functions appearing as indirect call targets could not be found in the SymTab, and the value:counter record was represented, in the text format, using an empty string for the name. This would then cause a silent parsing error when reading. This CL: - adds explicit support for such functions - fixes the places where we would not propagate errors when reading - addresses a performance issue due to eager resorting of the SymTab. Reviewers: xur, eraman, davidxl Reviewed By: davidxl Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D44717 llvm-svn: 328132 --- include/llvm/ProfileData/InstrProf.h | 45 ++++++++++++++--- include/llvm/ProfileData/InstrProfReader.h | 2 +- lib/ProfileData/InstrProf.cpp | 3 +- lib/ProfileData/InstrProfReader.cpp | 29 ++++++----- lib/ProfileData/InstrProfWriter.cpp | 4 +- .../tools/llvm-profdata/invalid-profdata.test | 50 +++++++++++++++++++ unittests/ProfileData/InstrProfTest.cpp | 3 -- 7 files changed, 105 insertions(+), 31 deletions(-) create mode 100644 test/tools/llvm-profdata/invalid-profdata.test diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index b08b78cd593..f15cf9a37f6 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -425,9 +425,20 @@ private: // A map from function runtime address to function name MD5 hash. // This map is only populated and used by raw instr profile reader. AddrHashMap AddrToMD5Map; + bool Sorted = false; + + static StringRef getExternalSymbol() { + return "** External Symbol **"; + } + + // If the symtab is created by a series of calls to \c addFuncName, \c + // finalizeSymtab needs to be called before looking up function names. + // This is required because the underlying map is a vector (for space + // efficiency) which needs to be sorted. + inline void finalizeSymtab(); public: - InstrProfSymtab() = default; + InstrProfSymtab() = default; /// Create InstrProfSymtab from an object file section which /// contains function PGO names. When section may contain raw @@ -456,21 +467,17 @@ public: /// \p IterRange. This interface is used by IndexedProfReader. template Error create(const NameIterRange &IterRange); - // If the symtab is created by a series of calls to \c addFuncName, \c - // finalizeSymtab needs to be called before looking up function names. - // This is required because the underlying map is a vector (for space - // efficiency) which needs to be sorted. - inline void finalizeSymtab(); - /// Update the symtab by adding \p FuncName to the table. This interface /// is used by the raw and text profile readers. Error addFuncName(StringRef FuncName) { if (FuncName.empty()) return make_error(instrprof_error::malformed); auto Ins = NameTab.insert(FuncName); - if (Ins.second) + if (Ins.second) { MD5NameMap.push_back(std::make_pair( IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + Sorted = false; + } return Error::success(); } @@ -491,6 +498,16 @@ public: /// If not found, return an empty string. inline StringRef getFuncName(uint64_t FuncMD5Hash); + /// Just like getFuncName, except that it will return a non-empty StringRef + /// if the function is external to this symbol table. All such cases + /// will be represented using the same StringRef value. + inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); + + /// True if Symbol is the value used to represent external symbols. + static bool isExternalSymbol(const StringRef &Symbol) { + return Symbol == InstrProfSymtab::getExternalSymbol(); + } + /// Return function from the name's md5 hash. Return nullptr if not found. inline Function *getFunction(uint64_t FuncMD5Hash); @@ -524,14 +541,25 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) { } void InstrProfSymtab::finalizeSymtab() { + if (Sorted) + return; std::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first()); std::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first()); std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), AddrToMD5Map.end()); + Sorted = true; +} + +StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { + StringRef ret = getFuncName(FuncMD5Hash); + if (ret.empty()) + return InstrProfSymtab::getExternalSymbol(); + return ret; } StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { + finalizeSymtab(); auto Result = std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, [](const std::pair &LHS, @@ -542,6 +570,7 @@ StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { } Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { + finalizeSymtab(); auto Result = std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, [](const std::pair &LHS, diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index aa58ead1eda..ea0aad04769 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -101,7 +101,7 @@ protected: return make_error(Err); } - Error error(Error E) { return error(InstrProfError::take(std::move(E))); } + Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } /// Clear the current error and return a successful one. Error success() { return error(instrprof_error::success); } diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 8ab5df59f53..7681d5c9464 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -355,7 +355,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { } } } - + Sorted = false; finalizeSymtab(); return Error::success(); } @@ -461,7 +461,6 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { while (P < EndP && *P == 0) P++; } - Symtab.finalizeSymtab(); return Error::success(); } diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 23c9a2676b9..69fc2bd529a 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -200,9 +200,13 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { std::pair VD = Line->rsplit(':'); uint64_t TakenCount, Value; if (ValueKind == IPVK_IndirectCallTarget) { - if (Error E = Symtab->addFuncName(VD.first)) - return E; - Value = IndexedInstrProf::ComputeHash(VD.first); + if (InstrProfSymtab::isExternalSymbol(VD.first)) { + Value = 0; + } else { + if (Error E = Symtab->addFuncName(VD.first)) + return E; + Value = IndexedInstrProf::ComputeHash(VD.first); + } } else { READ_NUM(VD.first, Value); } @@ -227,14 +231,13 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { ++Line; // If we hit EOF while looking for a name, we're done. if (Line.is_at_end()) { - Symtab->finalizeSymtab(); return error(instrprof_error::eof); } // Read the function name. Record.Name = *Line++; if (Error E = Symtab->addFuncName(Record.Name)) - return E; + return error(std::move(E)); // Read the function hash. if (Line.is_at_end()) @@ -265,11 +268,8 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { // Check if value profile data exists and read it if so. if (Error E = readValueProfileData(Record)) - return E; + return error(std::move(E)); - // This is needed to avoid two pass parsing because llvm-profdata - // does dumping while reading. - Symtab->finalizeSymtab(); return success(); } @@ -331,7 +331,6 @@ Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { continue; Symtab.mapAddress(FPtr, I->NameRef); } - Symtab.finalizeSymtab(); return success(); } @@ -449,23 +448,23 @@ Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) if (atEnd()) // At this point, ValueDataStart field points to the next header. if (Error E = readNextHeader(getNextHeaderPos())) - return E; + return error(std::move(E)); // Read name ad set it in Record. if (Error E = readName(Record)) - return E; + return error(std::move(E)); // Read FuncHash and set it in Record. if (Error E = readFuncHash(Record)) - return E; + return error(std::move(E)); // Read raw counts and set Record. if (Error E = readRawCounts(Record)) - return E; + return error(std::move(E)); // Read value data and set Record. if (Error E = readValueProfilingData(Record)) - return E; + return error(std::move(E)); // Iterate. advanceData(); diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index ce3f8806e12..33ceb66fd26 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -361,7 +361,8 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, std::unique_ptr VD = Func.getValueForSite(VK, S); for (uint32_t I = 0; I < ND; I++) { if (VK == IPVK_IndirectCallTarget) - OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; + OS << Symtab.getFuncNameOrExternalSymbol(VD[I].Value) << ":" + << VD[I].Count << "\n"; else OS << VD[I].Value << ":" << VD[I].Count << "\n"; } @@ -379,7 +380,6 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (shouldEncodeData(I.getValue())) if (Error E = Symtab.addFuncName(I.getKey())) return E; - Symtab.finalizeSymtab(); for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) diff --git a/test/tools/llvm-profdata/invalid-profdata.test b/test/tools/llvm-profdata/invalid-profdata.test new file mode 100644 index 00000000000..b6391b03464 --- /dev/null +++ b/test/tools/llvm-profdata/invalid-profdata.test @@ -0,0 +1,50 @@ +RUN: echo ":ir" > %t.input +RUN: echo "_ZN6Thread5StartEv" >> %t.input +RUN: echo "# Func Hash:" >> %t.input +RUN: echo "288793635542036872" >> %t.input +RUN: echo "# Num Counters:" >> %t.input +RUN: echo "3" >> %t.input +RUN: echo "# Counter Values:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "# Num Value Kinds:" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "# ValueKind = IPVK_IndirectCallTarget:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "# NumValueSites:" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "f1:10" >> %t.input +RUN: echo "f2:0" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo ":10" >> %t.input + +RUN: not llvm-profdata merge %t.input -text -output=/dev/null 2>&1 | FileCheck %s --check-prefix=BROKEN +BROKEN: Malformed instrumentation profile data + +RUN: echo ":ir" > %t.input +RUN: echo "_ZN6Thread5StartEv" >> %t.input +RUN: echo "# Func Hash:" >> %t.input +RUN: echo "288793635542036872" >> %t.input +RUN: echo "# Num Counters:" >> %t.input +RUN: echo "3" >> %t.input +RUN: echo "# Counter Values:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "12" >> %t.input +RUN: echo "# Num Value Kinds:" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "# ValueKind = IPVK_IndirectCallTarget:" >> %t.input +RUN: echo "0" >> %t.input +RUN: echo "# NumValueSites:" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "2" >> %t.input +RUN: echo "f1:10" >> %t.input +RUN: echo "f2:0" >> %t.input +RUN: echo "1" >> %t.input +RUN: echo "** External Symbol **:10" >> %t.input + +# RUN: llvm-profdata merge %t.input -text -output=%t.out && cat %t.out | FileCheck %s + +CHECK: ** External Symbol **:10 diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp index 79f880e475c..309241dfe6b 100644 --- a/unittests/ProfileData/InstrProfTest.cpp +++ b/unittests/ProfileData/InstrProfTest.cpp @@ -769,7 +769,6 @@ TEST_P(MaybeSparseInstrProfTest, value_prof_data_read_write_mapping) { Symtab.mapAddress(uint64_t(callee3), 0x3000ULL); Symtab.mapAddress(uint64_t(callee4), 0x4000ULL); // Missing mapping for callee5 - Symtab.finalizeSymtab(); VPData->deserializeTo(Record, &Symtab.getAddrHashMap()); @@ -858,8 +857,6 @@ TEST_P(MaybeSparseInstrProfTest, instr_prof_symtab_test) { EXPECT_THAT_ERROR(Symtab.addFuncName("blah_1"), Succeeded()); EXPECT_THAT_ERROR(Symtab.addFuncName("blah_2"), Succeeded()); EXPECT_THAT_ERROR(Symtab.addFuncName("blah_3"), Succeeded()); - // Finalize it - Symtab.finalizeSymtab(); // Check again R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_1"));