//===- Profile.cpp - XRay Profile Abstraction -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Defines the XRay Profile class representing the latency profile generated by // XRay's profiling mode. // //===----------------------------------------------------------------------===// #include "llvm/XRay/Profile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/XRay/Trace.h" #include #include namespace llvm { namespace xray { Profile::Profile(const Profile &O) { // We need to re-create all the tries from the original (O), into the current // Profile being initialized, through the Block instances we see. for (const auto &Block : O) { Blocks.push_back({Block.Thread, {}}); auto &B = Blocks.back(); for (const auto &PathData : Block.PathData) B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))), PathData.second}); } } Profile &Profile::operator=(const Profile &O) { Profile P = O; *this = std::move(P); return *this; } namespace { struct BlockHeader { uint32_t Size; uint32_t Number; uint64_t Thread; }; static Expected readBlockHeader(DataExtractor &Extractor, uint32_t &Offset) { BlockHeader H; uint32_t CurrentOffset = Offset; H.Size = Extractor.getU32(&Offset); if (Offset == CurrentOffset) return make_error( Twine("Error parsing block header size at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); CurrentOffset = Offset; H.Number = Extractor.getU32(&Offset); if (Offset == CurrentOffset) return make_error( Twine("Error parsing block header number at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); CurrentOffset = Offset; H.Thread = Extractor.getU64(&Offset); if (Offset == CurrentOffset) return make_error( Twine("Error parsing block header thread id at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); return H; } static Expected> readPath(DataExtractor &Extractor, uint32_t &Offset) { // We're reading a sequence of int32_t's until we find a 0. std::vector Path; auto CurrentOffset = Offset; int32_t FuncId; do { FuncId = Extractor.getSigned(&Offset, 4); if (CurrentOffset == Offset) return make_error( Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); CurrentOffset = Offset; Path.push_back(FuncId); } while (FuncId != 0); return std::move(Path); } static Expected readData(DataExtractor &Extractor, uint32_t &Offset) { // We expect a certain number of elements for Data: // - A 64-bit CallCount // - A 64-bit CumulativeLocalTime counter Profile::Data D; auto CurrentOffset = Offset; D.CallCount = Extractor.getU64(&Offset); if (CurrentOffset == Offset) return make_error( Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); CurrentOffset = Offset; D.CumulativeLocalTime = Extractor.getU64(&Offset); if (CurrentOffset == Offset) return make_error( Twine("Error parsing cumulative local time at offset '") + Twine(CurrentOffset) + "'", std::make_error_code(std::errc::invalid_argument)); return D; } } // namespace Error Profile::addBlock(Block &&B) { if (B.PathData.empty()) return make_error( "Block may not have empty path data.", std::make_error_code(std::errc::invalid_argument)); Blocks.emplace_back(std::move(B)); return Error::success(); } Expected> Profile::expandPath(PathID P) const { auto It = PathIDMap.find(P); if (It == PathIDMap.end()) return make_error( Twine("PathID not found: ") + Twine(P), std::make_error_code(std::errc::invalid_argument)); std::vector Path; for (auto Node = It->second; Node; Node = Node->Caller) Path.push_back(Node->Func); return std::move(Path); } Profile::PathID Profile::internPath(ArrayRef P) { if (P.empty()) return 0; auto RootToLeafPath = reverse(P); // Find the root. auto It = RootToLeafPath.begin(); auto PathRoot = *It++; auto RootIt = find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); // If we've not seen this root before, remember it. TrieNode *Node = nullptr; if (RootIt == Roots.end()) { NodeStorage.emplace_back(); Node = &NodeStorage.back(); Node->Func = PathRoot; Roots.push_back(Node); } else { Node = *RootIt; } // Now traverse the path, re-creating if necessary. while (It != RootToLeafPath.end()) { auto NodeFuncID = *It++; auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { return N->Func == NodeFuncID; }); if (CalleeIt == Node->Callees.end()) { NodeStorage.emplace_back(); auto NewNode = &NodeStorage.back(); NewNode->Func = NodeFuncID; NewNode->Caller = Node; Node->Callees.push_back(NewNode); Node = NewNode; } else { Node = *CalleeIt; } } // At this point, Node *must* be pointing at the leaf. assert(Node->Func == P.front()); if (Node->ID == 0) { Node->ID = NextID++; PathIDMap.insert({Node->ID, Node}); } return Node->ID; } Profile mergeProfilesByThread(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap; using PathDataMapPtr = std::unique_ptr; using PathDataVector = decltype(Profile::Block::PathData); using ThreadProfileIndexMap = DenseMap; ThreadProfileIndexMap ThreadProfileIndex; for (const auto &P : {std::ref(L), std::ref(R)}) for (const auto &Block : P.get()) { ThreadProfileIndexMap::iterator It; std::tie(It, std::ignore) = ThreadProfileIndex.insert( {Block.Thread, PathDataMapPtr{new PathDataMap()}}); for (const auto &PathAndData : Block.PathData) { auto &PathID = PathAndData.first; auto &Data = PathAndData.second; auto NewPathID = Merged.internPath(cantFail(P.get().expandPath(PathID))); PathDataMap::iterator PathDataIt; bool Inserted; std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); if (!Inserted) { auto &ExistingData = PathDataIt->second; ExistingData.CallCount += Data.CallCount; ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; } } } for (const auto &IndexedThreadBlock : ThreadProfileIndex) { PathDataVector PathAndData; PathAndData.reserve(IndexedThreadBlock.second->size()); copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); cantFail( Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); } return Merged; } Profile mergeProfilesByStack(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap; PathDataMap PathData; using PathDataVector = decltype(Profile::Block::PathData); for (const auto &P : {std::ref(L), std::ref(R)}) for (const auto &Block : P.get()) for (const auto &PathAndData : Block.PathData) { auto &PathId = PathAndData.first; auto &Data = PathAndData.second; auto NewPathID = Merged.internPath(cantFail(P.get().expandPath(PathId))); PathDataMap::iterator PathDataIt; bool Inserted; std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); if (!Inserted) { auto &ExistingData = PathDataIt->second; ExistingData.CallCount += Data.CallCount; ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; } } // In the end there's a single Block, for thread 0. PathDataVector Block; Block.reserve(PathData.size()); copy(PathData, std::back_inserter(Block)); cantFail(Merged.addBlock({0, std::move(Block)})); return Merged; } Expected loadProfile(StringRef Filename) { int Fd; if (auto EC = sys::fs::openFileForRead(Filename, Fd)) return make_error( Twine("Cannot read profile from '") + Filename + "'", EC); uint64_t FileSize; if (auto EC = sys::fs::file_size(Filename, FileSize)) return make_error( Twine("Cannot get filesize of '") + Filename + "'", EC); std::error_code EC; sys::fs::mapped_file_region MappedFile( Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); if (EC) return make_error( Twine("Cannot mmap profile '") + Filename + "'", EC); StringRef Data(MappedFile.data(), MappedFile.size()); Profile P; uint32_t Offset = 0; DataExtractor Extractor(Data, true, 8); // For each block we get from the file: while (Offset != MappedFile.size()) { auto HeaderOrError = readBlockHeader(Extractor, Offset); if (!HeaderOrError) return HeaderOrError.takeError(); // TODO: Maybe store this header information for each block, even just for // debugging? const auto &Header = HeaderOrError.get(); // Read in the path data. auto PathOrError = readPath(Extractor, Offset); if (!PathOrError) return PathOrError.takeError(); const auto &Path = PathOrError.get(); // For each path we encounter, we should intern it to get a PathID. auto DataOrError = readData(Extractor, Offset); if (!DataOrError) return DataOrError.takeError(); auto &Data = DataOrError.get(); if (auto E = P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, {{P.internPath(Path), std::move(Data)}}})) return std::move(E); } return P; } namespace { struct StackEntry { uint64_t Timestamp; Profile::FuncID FuncId; }; } // namespace Expected profileFromTrace(const Trace &T) { Profile P; // The implementation of the algorithm re-creates the execution of // the functions based on the trace data. To do this, we set up a number of // data structures to track the execution context of every thread in the // Trace. DenseMap> ThreadStacks; DenseMap> ThreadPathData; // We then do a pass through the Trace to account data on a per-thread-basis. for (const auto &E : T) { auto &TSD = ThreadStacks[E.TId]; switch (E.Type) { case RecordTypes::ENTER: case RecordTypes::ENTER_ARG: // Push entries into the function call stack. TSD.push_back({E.TSC, E.FuncId}); break; case RecordTypes::EXIT: case RecordTypes::TAIL_EXIT: // Exits cause some accounting to happen, based on the state of the stack. // For each function we pop off the stack, we take note of the path and // record the cumulative state for this path. As we're doing this, we // intern the path into the Profile. while (!TSD.empty()) { auto Top = TSD.back(); auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); SmallVector Path; transform(reverse(TSD), std::back_inserter(Path), std::mem_fn(&StackEntry::FuncId)); auto InternedPath = P.internPath(Path); auto &TPD = ThreadPathData[E.TId][InternedPath]; ++TPD.CallCount; TPD.CumulativeLocalTime += FunctionLocalTime; TSD.pop_back(); // If we've matched the corresponding entry event for this function, // then we exit the loop. if (Top.FuncId == E.FuncId) break; // FIXME: Consider the intermediate times and the cumulative tree time // as well. } break; case RecordTypes::CUSTOM_EVENT: case RecordTypes::TYPED_EVENT: // TODO: Support an extension point to allow handling of custom and typed // events in profiles. break; } } // Once we've gone through the Trace, we now create one Block per thread in // the Profile. for (const auto &ThreadPaths : ThreadPathData) { const auto &TID = ThreadPaths.first; const auto &PathsData = ThreadPaths.second; if (auto E = P.addBlock({ TID, std::vector>( PathsData.begin(), PathsData.end()), })) return std::move(E); } return P; } } // namespace xray } // namespace llvm