| //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements hierarchical time profiler. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Support/TimeProfiler.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/StringMap.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/JSON.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/Process.h" |
| #include "llvm/Support/Threading.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <chrono> |
| #include <mutex> |
| #include <string> |
| #include <vector> |
| |
| using namespace std::chrono; |
| using namespace llvm; |
| |
| static std::mutex Mu; |
| // List of all instances |
| static ManagedStatic<std::vector<TimeTraceProfiler *>> |
| ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu) |
| // Per Thread instance |
| static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; |
| |
| TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { |
| return TimeTraceProfilerInstance; |
| } |
| |
| typedef duration<steady_clock::rep, steady_clock::period> DurationType; |
| typedef time_point<steady_clock> TimePointType; |
| typedef std::pair<size_t, DurationType> CountAndDurationType; |
| typedef std::pair<std::string, CountAndDurationType> |
| NameAndCountAndDurationType; |
| |
| namespace { |
| struct Entry { |
| const TimePointType Start; |
| TimePointType End; |
| const std::string Name; |
| const std::string Detail; |
| |
| Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) |
| : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), |
| Detail(std::move(Dt)) {} |
| |
| // Calculate timings for FlameGraph. Cast time points to microsecond precision |
| // rather than casting duration. This avoid truncation issues causing inner |
| // scopes overruning outer scopes. |
| steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { |
| return (time_point_cast<microseconds>(Start) - |
| time_point_cast<microseconds>(StartTime)) |
| .count(); |
| } |
| |
| steady_clock::rep getFlameGraphDurUs() const { |
| return (time_point_cast<microseconds>(End) - |
| time_point_cast<microseconds>(Start)) |
| .count(); |
| } |
| }; |
| } // namespace |
| |
| struct llvm::TimeTraceProfiler { |
| TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") |
| : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()), |
| ProcName(ProcName), Pid(sys::Process::getProcessId()), |
| Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { |
| llvm::get_thread_name(ThreadName); |
| } |
| |
| void begin(std::string Name, llvm::function_ref<std::string()> Detail) { |
| Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), |
| Detail()); |
| } |
| |
| void end() { |
| assert(!Stack.empty() && "Must call begin() first"); |
| Entry &E = Stack.back(); |
| E.End = steady_clock::now(); |
| |
| // Check that end times monotonically increase. |
| assert((Entries.empty() || |
| (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= |
| Entries.back().getFlameGraphStartUs(StartTime) + |
| Entries.back().getFlameGraphDurUs())) && |
| "TimeProfiler scope ended earlier than previous scope"); |
| |
| // Calculate duration at full precision for overall counts. |
| DurationType Duration = E.End - E.Start; |
| |
| // Only include sections longer or equal to TimeTraceGranularity msec. |
| if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) |
| Entries.emplace_back(E); |
| |
| // Track total time taken by each "name", but only the topmost levels of |
| // them; e.g. if there's a template instantiation that instantiates other |
| // templates from within, we only want to add the topmost one. "topmost" |
| // happens to be the ones that don't have any currently open entries above |
| // itself. |
| if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), |
| [&](const Entry &Val) { return Val.Name == E.Name; })) { |
| auto &CountAndTotal = CountAndTotalPerName[E.Name]; |
| CountAndTotal.first++; |
| CountAndTotal.second += Duration; |
| } |
| |
| Stack.pop_back(); |
| } |
| |
| // Write events from this TimeTraceProfilerInstance and |
| // ThreadTimeTraceProfilerInstances. |
| void write(raw_pwrite_stream &OS) { |
| // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. |
| std::lock_guard<std::mutex> Lock(Mu); |
| assert(Stack.empty() && |
| "All profiler sections should be ended when calling write"); |
| assert(llvm::all_of(*ThreadTimeTraceProfilerInstances, |
| [](const auto &TTP) { return TTP->Stack.empty(); }) && |
| "All profiler sections should be ended when calling write"); |
| |
| json::OStream J(OS); |
| J.objectBegin(); |
| J.attributeBegin("traceEvents"); |
| J.arrayBegin(); |
| |
| // Emit all events for the main flame graph. |
| auto writeEvent = [&](const auto &E, uint64_t Tid) { |
| auto StartUs = E.getFlameGraphStartUs(StartTime); |
| auto DurUs = E.getFlameGraphDurUs(); |
| |
| J.object([&] { |
| J.attribute("pid", Pid); |
| J.attribute("tid", int64_t(Tid)); |
| J.attribute("ph", "X"); |
| J.attribute("ts", StartUs); |
| J.attribute("dur", DurUs); |
| J.attribute("name", E.Name); |
| if (!E.Detail.empty()) { |
| J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); |
| } |
| }); |
| }; |
| for (const Entry &E : Entries) |
| writeEvent(E, this->Tid); |
| for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) |
| for (const Entry &E : TTP->Entries) |
| writeEvent(E, TTP->Tid); |
| |
| // Emit totals by section name as additional "thread" events, sorted from |
| // longest one. |
| // Find highest used thread id. |
| uint64_t MaxTid = this->Tid; |
| for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) |
| MaxTid = std::max(MaxTid, TTP->Tid); |
| |
| // Combine all CountAndTotalPerName from threads into one. |
| StringMap<CountAndDurationType> AllCountAndTotalPerName; |
| auto combineStat = [&](const auto &Stat) { |
| StringRef Key = Stat.getKey(); |
| auto Value = Stat.getValue(); |
| auto &CountAndTotal = AllCountAndTotalPerName[Key]; |
| CountAndTotal.first += Value.first; |
| CountAndTotal.second += Value.second; |
| }; |
| for (const auto &Stat : CountAndTotalPerName) |
| combineStat(Stat); |
| for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) |
| for (const auto &Stat : TTP->CountAndTotalPerName) |
| combineStat(Stat); |
| |
| std::vector<NameAndCountAndDurationType> SortedTotals; |
| SortedTotals.reserve(AllCountAndTotalPerName.size()); |
| for (const auto &Total : AllCountAndTotalPerName) |
| SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); |
| |
| llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, |
| const NameAndCountAndDurationType &B) { |
| return A.second.second > B.second.second; |
| }); |
| |
| // Report totals on separate threads of tracing file. |
| uint64_t TotalTid = MaxTid + 1; |
| for (const NameAndCountAndDurationType &Total : SortedTotals) { |
| auto DurUs = duration_cast<microseconds>(Total.second.second).count(); |
| auto Count = AllCountAndTotalPerName[Total.first].first; |
| |
| J.object([&] { |
| J.attribute("pid", Pid); |
| J.attribute("tid", int64_t(TotalTid)); |
| J.attribute("ph", "X"); |
| J.attribute("ts", 0); |
| J.attribute("dur", DurUs); |
| J.attribute("name", "Total " + Total.first); |
| J.attributeObject("args", [&] { |
| J.attribute("count", int64_t(Count)); |
| J.attribute("avg ms", int64_t(DurUs / Count / 1000)); |
| }); |
| }); |
| |
| ++TotalTid; |
| } |
| |
| auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, |
| StringRef arg) { |
| J.object([&] { |
| J.attribute("cat", ""); |
| J.attribute("pid", Pid); |
| J.attribute("tid", int64_t(Tid)); |
| J.attribute("ts", 0); |
| J.attribute("ph", "M"); |
| J.attribute("name", Name); |
| J.attributeObject("args", [&] { J.attribute("name", arg); }); |
| }); |
| }; |
| |
| writeMetadataEvent("process_name", Tid, ProcName); |
| writeMetadataEvent("thread_name", Tid, ThreadName); |
| for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) |
| writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); |
| |
| J.arrayEnd(); |
| J.attributeEnd(); |
| |
| // Emit the absolute time when this TimeProfiler started. |
| // This can be used to combine the profiling data from |
| // multiple processes and preserve actual time intervals. |
| J.attribute("beginningOfTime", |
| time_point_cast<microseconds>(BeginningOfTime) |
| .time_since_epoch() |
| .count()); |
| |
| J.objectEnd(); |
| } |
| |
| SmallVector<Entry, 16> Stack; |
| SmallVector<Entry, 128> Entries; |
| StringMap<CountAndDurationType> CountAndTotalPerName; |
| const time_point<system_clock> BeginningOfTime; |
| const TimePointType StartTime; |
| const std::string ProcName; |
| const sys::Process::Pid Pid; |
| SmallString<0> ThreadName; |
| const uint64_t Tid; |
| |
| // Minimum time granularity (in microseconds) |
| const unsigned TimeTraceGranularity; |
| }; |
| |
| void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, |
| StringRef ProcName) { |
| assert(TimeTraceProfilerInstance == nullptr && |
| "Profiler should not be initialized"); |
| TimeTraceProfilerInstance = new TimeTraceProfiler( |
| TimeTraceGranularity, llvm::sys::path::filename(ProcName)); |
| } |
| |
| // Removes all TimeTraceProfilerInstances. |
| // Called from main thread. |
| void llvm::timeTraceProfilerCleanup() { |
| delete TimeTraceProfilerInstance; |
| TimeTraceProfilerInstance = nullptr; |
| std::lock_guard<std::mutex> Lock(Mu); |
| for (auto *TTP : *ThreadTimeTraceProfilerInstances) |
| delete TTP; |
| ThreadTimeTraceProfilerInstances->clear(); |
| } |
| |
| // Finish TimeTraceProfilerInstance on a worker thread. |
| // This doesn't remove the instance, just moves the pointer to global vector. |
| void llvm::timeTraceProfilerFinishThread() { |
| std::lock_guard<std::mutex> Lock(Mu); |
| ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance); |
| TimeTraceProfilerInstance = nullptr; |
| } |
| |
| void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { |
| assert(TimeTraceProfilerInstance != nullptr && |
| "Profiler object can't be null"); |
| TimeTraceProfilerInstance->write(OS); |
| } |
| |
| Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, |
| StringRef FallbackFileName) { |
| assert(TimeTraceProfilerInstance != nullptr && |
| "Profiler object can't be null"); |
| |
| std::string Path = PreferredFileName.str(); |
| if (Path.empty()) { |
| Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); |
| Path += ".time-trace"; |
| } |
| |
| std::error_code EC; |
| raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); |
| if (EC) |
| return createStringError(EC, "Could not open " + Path); |
| |
| timeTraceProfilerWrite(OS); |
| return Error::success(); |
| } |
| |
| void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) { |
| if (TimeTraceProfilerInstance != nullptr) |
| TimeTraceProfilerInstance->begin(std::string(Name), |
| [&]() { return std::string(Detail); }); |
| } |
| |
| void llvm::timeTraceProfilerBegin(StringRef Name, |
| llvm::function_ref<std::string()> Detail) { |
| if (TimeTraceProfilerInstance != nullptr) |
| TimeTraceProfilerInstance->begin(std::string(Name), Detail); |
| } |
| |
| void llvm::timeTraceProfilerEnd() { |
| if (TimeTraceProfilerInstance != nullptr) |
| TimeTraceProfilerInstance->end(); |
| } |