1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00
llvm-mirror/lib/Support/TimeProfiler.cpp
Sergej Jaskiewicz f9b2e0fd91 [TimeProfiler] Emit clock synchronization point
Time profiler emits relative timestamps for events (the number of
microseconds passed since the start of the current process).

This patch allows combining events from different processes while
preserving their relative timing by emitting a new attribute
"beginningOfTime". This attribute contains the system time that
corresponds to the zero timestamp of the time profiler.

This has at least two use cases:

- Build systems can use this to merge time traces from multiple compiler
  invocations and generate statistics for the whole build. Tools like
  ClangBuildAnalyzer could also leverage this feature.

- Compilers that use LLVM as their backend by invoking llc/opt in
  a child process. If such a compiler supports generating time traces
  of its own events, it could merge those events with LLVM-specific
  events received from llc/opt, and produce a more complete time trace.

A proof-of-concept script that merges multiple logs that
contain a synchronization point into one log:
https://github.com/broadwaylamb/merge_trace_events

Differential Revision: https://reviews.llvm.org/D78030
2020-04-23 01:09:31 +03:00

331 lines
12 KiB
C++

//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements hierarchical time profiler.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/TimeProfiler.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <mutex>
#include <string>
#include <vector>
using namespace std::chrono;
using namespace llvm;
static std::mutex Mu;
// List of all instances
static std::vector<TimeTraceProfiler *>
ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
// Per Thread instance
static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
return TimeTraceProfilerInstance;
}
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
typedef time_point<steady_clock> TimePointType;
typedef std::pair<size_t, DurationType> CountAndDurationType;
typedef std::pair<std::string, CountAndDurationType>
NameAndCountAndDurationType;
namespace {
struct Entry {
const TimePointType Start;
TimePointType End;
const std::string Name;
const std::string Detail;
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
Detail(std::move(Dt)) {}
// Calculate timings for FlameGraph. Cast time points to microsecond precision
// rather than casting duration. This avoid truncation issues causing inner
// scopes overruning outer scopes.
steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
return (time_point_cast<microseconds>(Start) -
time_point_cast<microseconds>(StartTime))
.count();
}
steady_clock::rep getFlameGraphDurUs() const {
return (time_point_cast<microseconds>(End) -
time_point_cast<microseconds>(Start))
.count();
}
};
} // namespace
struct llvm::TimeTraceProfiler {
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
: BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
ProcName(ProcName), Pid(sys::Process::getProcessId()),
Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
llvm::get_thread_name(ThreadName);
}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
Detail());
}
void end() {
assert(!Stack.empty() && "Must call begin() first");
Entry &E = Stack.back();
E.End = steady_clock::now();
// Check that end times monotonically increase.
assert((Entries.empty() ||
(E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
Entries.back().getFlameGraphStartUs(StartTime) +
Entries.back().getFlameGraphDurUs())) &&
"TimeProfiler scope ended earlier than previous scope");
// Calculate duration at full precision for overall counts.
DurationType Duration = E.End - E.Start;
// Only include sections longer or equal to TimeTraceGranularity msec.
if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
Entries.emplace_back(E);
// Track total time taken by each "name", but only the topmost levels of
// them; e.g. if there's a template instantiation that instantiates other
// templates from within, we only want to add the topmost one. "topmost"
// happens to be the ones that don't have any currently open entries above
// itself.
if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
return Val.Name == E.Name;
}) == Stack.rend()) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
CountAndTotal.second += Duration;
}
Stack.pop_back();
}
// Write events from this TimeTraceProfilerInstance and
// ThreadTimeTraceProfilerInstances.
void write(raw_pwrite_stream &OS) {
// Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
std::lock_guard<std::mutex> Lock(Mu);
assert(Stack.empty() &&
"All profiler sections should be ended when calling write");
assert(llvm::all_of(ThreadTimeTraceProfilerInstances,
[](const auto &TTP) { return TTP->Stack.empty(); }) &&
"All profiler sections should be ended when calling write");
json::OStream J(OS);
J.objectBegin();
J.attributeBegin("traceEvents");
J.arrayBegin();
// Emit all events for the main flame graph.
auto writeEvent = [&](const auto &E, uint64_t Tid) {
auto StartUs = E.getFlameGraphStartUs(StartTime);
auto DurUs = E.getFlameGraphDurUs();
J.object([&] {
J.attribute("pid", Pid);
J.attribute("tid", int64_t(Tid));
J.attribute("ph", "X");
J.attribute("ts", StartUs);
J.attribute("dur", DurUs);
J.attribute("name", E.Name);
if (!E.Detail.empty()) {
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
}
});
};
for (const Entry &E : Entries)
writeEvent(E, this->Tid);
for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
for (const Entry &E : TTP->Entries)
writeEvent(E, TTP->Tid);
// Emit totals by section name as additional "thread" events, sorted from
// longest one.
// Find highest used thread id.
uint64_t MaxTid = this->Tid;
for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
MaxTid = std::max(MaxTid, TTP->Tid);
// Combine all CountAndTotalPerName from threads into one.
StringMap<CountAndDurationType> AllCountAndTotalPerName;
auto combineStat = [&](const auto &Stat) {
StringRef Key = Stat.getKey();
auto Value = Stat.getValue();
auto &CountAndTotal = AllCountAndTotalPerName[Key];
CountAndTotal.first += Value.first;
CountAndTotal.second += Value.second;
};
for (const auto &Stat : CountAndTotalPerName)
combineStat(Stat);
for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
for (const auto &Stat : TTP->CountAndTotalPerName)
combineStat(Stat);
std::vector<NameAndCountAndDurationType> SortedTotals;
SortedTotals.reserve(AllCountAndTotalPerName.size());
for (const auto &Total : AllCountAndTotalPerName)
SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
const NameAndCountAndDurationType &B) {
return A.second.second > B.second.second;
});
// Report totals on separate threads of tracing file.
uint64_t TotalTid = MaxTid + 1;
for (const NameAndCountAndDurationType &Total : SortedTotals) {
auto DurUs = duration_cast<microseconds>(Total.second.second).count();
auto Count = AllCountAndTotalPerName[Total.first].first;
J.object([&] {
J.attribute("pid", Pid);
J.attribute("tid", int64_t(TotalTid));
J.attribute("ph", "X");
J.attribute("ts", 0);
J.attribute("dur", DurUs);
J.attribute("name", "Total " + Total.first);
J.attributeObject("args", [&] {
J.attribute("count", int64_t(Count));
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
});
});
++TotalTid;
}
auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
StringRef arg) {
J.object([&] {
J.attribute("cat", "");
J.attribute("pid", Pid);
J.attribute("tid", int64_t(Tid));
J.attribute("ts", 0);
J.attribute("ph", "M");
J.attribute("name", Name);
J.attributeObject("args", [&] { J.attribute("name", arg); });
});
};
writeMetadataEvent("process_name", Tid, ProcName);
writeMetadataEvent("thread_name", Tid, ThreadName);
for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
J.arrayEnd();
J.attributeEnd();
// Emit the absolute time when this TimeProfiler started.
// This can be used to combine the profiling data from
// multiple processes and preserve actual time intervals.
J.attribute("beginningOfTime",
time_point_cast<microseconds>(BeginningOfTime)
.time_since_epoch()
.count());
J.objectEnd();
}
SmallVector<Entry, 16> Stack;
SmallVector<Entry, 128> Entries;
StringMap<CountAndDurationType> CountAndTotalPerName;
const time_point<system_clock> BeginningOfTime;
const TimePointType StartTime;
const std::string ProcName;
const sys::Process::Pid Pid;
SmallString<0> ThreadName;
const uint64_t Tid;
// Minimum time granularity (in microseconds)
const unsigned TimeTraceGranularity;
};
void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
StringRef ProcName) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
TimeTraceProfilerInstance = new TimeTraceProfiler(
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
}
// Removes all TimeTraceProfilerInstances.
// Called from main thread.
void llvm::timeTraceProfilerCleanup() {
delete TimeTraceProfilerInstance;
std::lock_guard<std::mutex> Lock(Mu);
for (auto TTP : ThreadTimeTraceProfilerInstances)
delete TTP;
ThreadTimeTraceProfilerInstances.clear();
}
// Finish TimeTraceProfilerInstance on a worker thread.
// This doesn't remove the instance, just moves the pointer to global vector.
void llvm::timeTraceProfilerFinishThread() {
std::lock_guard<std::mutex> Lock(Mu);
ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
TimeTraceProfilerInstance = nullptr;
}
void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
assert(TimeTraceProfilerInstance != nullptr &&
"Profiler object can't be null");
TimeTraceProfilerInstance->write(OS);
}
Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
StringRef FallbackFileName) {
assert(TimeTraceProfilerInstance != nullptr &&
"Profiler object can't be null");
std::string Path = PreferredFileName.str();
if (Path.empty()) {
Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
Path += ".time-trace";
}
std::error_code EC;
raw_fd_ostream OS(Path, EC, sys::fs::OF_Text);
if (EC)
return createStringError(EC, "Could not open " + Path);
timeTraceProfilerWrite(OS);
return Error::success();
}
void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(std::string(Name),
[&]() { return std::string(Detail); });
}
void llvm::timeTraceProfilerBegin(StringRef Name,
llvm::function_ref<std::string()> Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(std::string(Name), Detail);
}
void llvm::timeTraceProfilerEnd() {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->end();
}