| //===-cPerf.cpp - Linux perf.data parsing ---------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This is a python module that reads perf.data files generated by Linux Perf. |
| // Writing efficiently-executing Python that reads and manipulates binary files |
| // is difficult and results in code that looks very un-Python-like. |
| // |
| // Also, parsing profiles is on the critical path for result submission and |
| // often has to run on target devices that may not be as fast as a desktop |
| // machine (an example is a pandaboard - people still test on Cortex-A9). |
| // |
| // This algorithm aims to be faster than 'perf report' or 'perf annotate'. |
| // In experiments this module is around 6x faster than 'perf annotate' - more |
| // when we reduce the amount of data imported (see later). |
| // |
| // Algorithm |
| // --------- |
| // |
| // We start by mmapping the perf.data file and reading the header, event |
| // attribute entries and the sample data stream. Every sample is aggregated into |
| // a set of counters counting every event for every PC location (indexed by mmap |
| // ID - a data stream can contain samples for the same PC in different binaries |
| // e.g. if exec() is called). Total counters are also kept for the entire stream |
| // and per-mmap. |
| // |
| // The source for the perf.data format is here: https://lwn.net/Articles/644919/ |
| // and the perf_events manpage. |
| // |
| // Once the aggregation is done, we do a single pass through the event data: |
| // |
| // for each mmap: |
| // if the mmap's event total is < 1% of the total in all counters, |
| // then discard the mmap [1]. |
| // |
| // load symbol data by calling "objdump -t" and parsing the result. |
| // for all PCs we have events for, in sorted order: |
| // find the symbol this PC is part of -> Sym |
| // look up all PCs between Sym.Start and Sym.End and emit data |
| // |
| // The output of this module (cPerf.importPerf) is a dictionary in (almost) |
| // ProfileV1 form (see profile.py and profilev1impl.py). The only difference is |
| // that all counters are absolute. |
| // |
| // [1]: Perf will start sampling from the moment the perf wrapper tool is |
| // invoked, and its samples will continue until the perf wrapper tool exits. |
| // This means that it will often take one or two samples in intermediate |
| // binaries like "perf", "bash", or libaries such as libdl. We don't care about |
| // these, and these binaries and libraries are very large to objdump. |
| // |
| // So we have a threshold - if a binary contains < 1% of all samples, don't |
| // bother importing it. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifdef _WIN32 |
| #define WIN32_LEAN_AND_MEAN |
| #define _CRT_SECURE_NO_WARNINGS |
| #define strtok_r strtok_s |
| #include <windows.h> |
| #include <BaseTsd.h> |
| typedef SSIZE_T ssize_t; |
| #define PROT_EXEC 4 |
| #endif |
| #include <Python.h> |
| #include <algorithm> |
| #include <cassert> |
| #include <cstring> |
| #include <exception> |
| #include <fcntl.h> |
| #include <iostream> |
| #include <map> |
| #include <sstream> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #ifndef _WIN32 |
| #include <sys/mman.h> |
| #include <sys/wait.h> |
| #include <unistd.h> |
| #endif |
| #include <sys/stat.h> |
| #include <vector> |
| |
| //===----------------------------------------------------------------------===// |
| // Helpers |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef STANDALONE |
| #ifdef assert |
| #undef assert |
| #endif |
| |
| // Redefine assert so we can use it without destroying the Python interpreter! |
| #define assert(expr) Assert((expr), #expr, __FILE__, __LINE__) |
| |
| #endif |
| |
| // Remove a uint32_t from a byte stream |
| uint32_t TakeU32(unsigned char *&Buf) { |
| uint32_t X = * (uint32_t*) Buf; |
| Buf += sizeof(uint32_t); |
| return X; |
| } |
| |
| // Remove a uint64_t from a byte stream |
| uint64_t TakeU64(unsigned char *&Buf) { |
| uint64_t X = * (uint64_t*) Buf; |
| Buf += sizeof(uint64_t); |
| return X; |
| } |
| |
| // Forks, execs Cmd under a shell and returns |
| // a file descriptor reading the command's stdout. |
| FILE *ForkAndExec(std::string Cmd) { |
| #ifdef _WIN32 |
| Cmd = "cmd.exe /c " + Cmd; |
| FILE *Stream = _popen(Cmd.c_str(), "rt"); |
| #else |
| int P[2]; |
| pipe(P); |
| |
| FILE *Stream = fdopen(P[0], "r"); |
| |
| if (fork() == 0) { |
| dup2(P[1], 1); |
| close(P[0]); |
| execl("/bin/sh", "sh", "-c", Cmd.c_str(), (char *)NULL); |
| } else { |
| close(P[1]); |
| } |
| #endif |
| return Stream; |
| } |
| |
| #ifdef _WIN32 |
| ssize_t getline(char** lineptr, size_t* n, FILE* stream) { |
| if (lineptr == nullptr || stream == nullptr || n == nullptr) return -1; |
| int c = fgetc(stream); |
| if (c == EOF) return -1; |
| char* bufptr = *lineptr; |
| size_t size = *n; |
| if (bufptr == nullptr) { |
| size = 128; |
| bufptr = (char*)malloc(size); |
| if (bufptr == nullptr) return -1; |
| *lineptr = bufptr; |
| } |
| char* p = bufptr; |
| while (c != EOF) { |
| size_t offset = (size_t)(p - bufptr); |
| if ((offset + 2) > size) { |
| size = size + 128; |
| bufptr = (char*)realloc(bufptr, size); |
| if (bufptr == nullptr) return -1; |
| *lineptr = bufptr; |
| p = bufptr + offset; |
| } |
| *p++ = c; |
| if (c == '\n') break; |
| c = fgetc(stream); |
| } |
| *p = '\0'; |
| *n = size; |
| return p - bufptr; |
| } |
| #endif |
| |
| void Assert(bool Expr, const char *ExprStr, const char *File, int Line) { |
| if (Expr) |
| return; |
| char Str[512]; |
| sprintf(Str, "%s (%s:%d)", ExprStr, File, Line); |
| throw std::logic_error(Str); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Perf structures. Taken from https://lwn.net/Articles/644919/ |
| //===----------------------------------------------------------------------===// |
| |
| #define PERF_RECORD_MMAP 1 |
| #define PERF_RECORD_SAMPLE 9 |
| #define PERF_RECORD_MMAP2 10 |
| |
| #define PERF_SAMPLE_IP (1U << 0) |
| #define PERF_SAMPLE_TID (1U << 1) |
| #define PERF_SAMPLE_TIME (1U << 2) |
| #define PERF_SAMPLE_ADDR (1U << 3) |
| #define PERF_SAMPLE_ID (1U << 6) |
| #define PERF_SAMPLE_CPU (1U << 7) |
| #define PERF_SAMPLE_PERIOD (1U << 8) |
| #define PERF_SAMPLE_STREAM_ID (1U << 9) |
| #define PERF_SAMPLE_IDENTIFIER (1U << 16) |
| |
| struct perf_file_section { |
| uint64_t offset; /* offset from start of file */ |
| uint64_t size; /* size of the section */ |
| }; |
| |
| struct perf_header { |
| char magic[8]; /* PERFILE2 */ |
| uint64_t size; /* size of the header */ |
| uint64_t attr_size; /* size of an attribute in attrs */ |
| struct perf_file_section attrs; |
| struct perf_file_section data; |
| struct perf_file_section event_types; |
| uint64_t flags; |
| uint64_t flags1[3]; |
| }; |
| |
| struct perf_event_attr { |
| uint32_t type; |
| uint32_t size; |
| uint64_t config; |
| uint64_t sample_period; |
| uint64_t sample_type; |
| uint64_t read_format; |
| uint64_t flags; |
| uint32_t wakeup_events; |
| uint32_t bp_type; |
| uint64_t bp_addr; |
| uint64_t bp_len; |
| uint64_t branch_sample_type; |
| }; |
| |
| struct perf_event_header { |
| uint32_t type; |
| uint16_t misc; |
| uint16_t size; |
| }; |
| |
| struct perf_event_sample { |
| struct perf_event_header header; |
| |
| uint64_t sample_id; |
| uint64_t ip; |
| uint32_t pid, tid; |
| uint64_t time; |
| uint64_t id; |
| uint64_t period; |
| }; |
| |
| struct perf_event_mmap_common { |
| struct perf_event_header header; |
| |
| uint32_t pid, tid; |
| uint64_t start, extent, pgoff; |
| }; |
| |
| struct perf_event_mmap { |
| struct perf_event_mmap_common mmap_common; |
| char filename[1]; |
| }; |
| |
| struct perf_event_mmap2 { |
| struct perf_event_mmap_common mmap_common; |
| uint32_t maj, min; |
| uint64_t ino, ino_generation; |
| uint32_t prot, flags; |
| char filename[1]; |
| }; |
| |
| struct perf_trace_event_type { |
| uint64_t event_id; |
| char str[64]; |
| }; |
| |
| enum perf_type_id { |
| PERF_TYPE_HARDWARE = 0, |
| PERF_TYPE_SOFTWARE = 1, |
| PERF_TYPE_TRACEPOINT = 2, |
| PERF_TYPE_HW_CACHE = 3, |
| PERF_TYPE_RAW = 4, |
| PERF_TYPE_BREAKPOINT = 5, |
| PERF_TYPE_MAX |
| }; |
| |
| enum perf_hw_id { |
| PERF_COUNT_HW_CPU_CYCLES = 0, |
| PERF_COUNT_HW_INSTRUCTIONS = 1, |
| PERF_COUNT_HW_CACHE_REFERENCES = 2, |
| PERF_COUNT_HW_CACHE_MISSES = 3, |
| PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, |
| PERF_COUNT_HW_BRANCH_MISSES = 5, |
| PERF_COUNT_HW_BUS_CYCLES = 6, |
| PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, |
| PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, |
| PERF_COUNT_HW_REF_CPU_CYCLES = 9, |
| PERF_COUNT_HW_MAX |
| }; |
| |
| static const char* hw_event_names[PERF_COUNT_HW_MAX] = { |
| "cycles", |
| "instructions", |
| "cache-references", |
| "cache-misses", |
| "branch-instructions", |
| "branch-misses", |
| "bus-cycles", |
| "stalled-cycles-frontend", |
| "stalled-cycles-backend", |
| "ref-cpu-cycles" |
| }; |
| |
| enum perf_sw_ids { |
| PERF_COUNT_SW_CPU_CLOCK = 0, |
| PERF_COUNT_SW_TASK_CLOCK = 1, |
| PERF_COUNT_SW_PAGE_FAULTS = 2, |
| PERF_COUNT_SW_CONTEXT_SWITCHES = 3, |
| PERF_COUNT_SW_CPU_MIGRATIONS = 4, |
| PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, |
| PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, |
| PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, |
| PERF_COUNT_SW_EMULATION_FAULTS = 8, |
| PERF_COUNT_SW_MAX |
| }; |
| |
| static const char* sw_event_names[PERF_COUNT_SW_MAX] = { |
| "cpu-clock", |
| "task-clock", |
| "page-faults", |
| "context-switches", |
| "cpu-migrations", |
| "minor-faults", |
| "major-faults", |
| "alignment-faults", |
| "emulation-faults" |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // Readers for objdump output |
| //===----------------------------------------------------------------------===// |
| |
| struct Map { |
| Map(uint64_t Start, uint64_t End, const char *Filename) |
| : Start(Start), End(End), Filename(Filename) {} |
| |
| uint64_t Start, End; |
| const char *Filename; |
| |
| // Mapping-related adjustments. Here FileOffset(func) is the offset of func |
| // in the ELF file, VAddr(func) is the virtual address associated with this |
| // symbol (in case of executable and shared object ELF files, st_value field |
| // of a symbol table's entry is symbol's virtual address) and &func is the |
| // actual memory address after relocations took place in the address space of |
| // the process being profiled. |
| uint64_t FileToPCOffset; // FileOffset(func) + FileToPCOffset == &func |
| uint64_t VAddrToFileOffset; // VAddr(func) + VAddrToFileOffset == FileOffset(func) |
| }; |
| |
| struct EventDesc { |
| uint64_t Start; |
| uint64_t End; |
| size_t MapId; |
| }; |
| |
| struct Symbol { |
| uint64_t Start; |
| uint64_t End; |
| std::string Name; |
| |
| bool operator<(const Symbol &Other) const { return Start < Other.Start; } |
| bool operator==(const Symbol &Other) const { |
| return Start == Other.Start && End == Other.End && Name == Other.Name; |
| } |
| }; |
| |
| class SymTabOutput : public std::vector<Symbol> { |
| public: |
| std::string Objdump, BinaryCacheRoot; |
| |
| SymTabOutput(std::string Objdump, std::string BinaryCacheRoot) |
| : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) {} |
| |
| void fetchExecSegment(Map *M, uint64_t *FileOffset, uint64_t *VAddr) { |
| std::string Cmd = Objdump + " -p -C " + |
| BinaryCacheRoot + std::string(M->Filename) + |
| #ifdef _WIN32 |
| " 2> NUL"; |
| #else |
| " 2>/dev/null"; |
| #endif |
| auto Stream = ForkAndExec(Cmd); |
| |
| char *Line = nullptr, *PrevLine = nullptr; |
| size_t LineLen = 0; |
| *FileOffset = *VAddr = 0; |
| while (true) { |
| if (PrevLine) |
| free (PrevLine); |
| if (Line) |
| PrevLine = strdup (Line); |
| ssize_t Len = getline(&Line, &LineLen, Stream); |
| if (Len == -1) |
| break; |
| |
| if (!strstr(Line, "flags r-x") && !strstr(Line, "flags rwx")) |
| continue; |
| |
| /* Format is weird.. but we did find the section so punt. */ |
| const char *OFFSET_LABEL = "off "; |
| const char *VADDR_LABEL = "vaddr "; |
| char *pos_offset = strstr(PrevLine, OFFSET_LABEL); |
| char *pos_vaddr = strstr(PrevLine, VADDR_LABEL); |
| if (!pos_offset || !pos_vaddr) |
| break; |
| |
| pos_offset += strlen(OFFSET_LABEL); |
| pos_vaddr += strlen(VADDR_LABEL); |
| *FileOffset = strtoull(pos_offset, NULL, 16); |
| *VAddr = strtoull(pos_vaddr, NULL, 16); |
| |
| break; |
| } |
| if (Line) |
| free(Line); |
| if (PrevLine) |
| free(PrevLine); |
| |
| #ifdef _WIN32 |
| _pclose(Stream); |
| #else |
| fclose(Stream); |
| wait(NULL); |
| #endif |
| } |
| |
| void fetchSymbols(Map *M) { |
| std::string Cmd = Objdump + " -t -T -C " + |
| BinaryCacheRoot + std::string(M->Filename) + |
| #ifdef _WIN32 |
| " 2> NUL"; |
| #else |
| " 2>/dev/null"; |
| #endif |
| auto Stream = ForkAndExec(Cmd); |
| |
| char *Line = nullptr; |
| size_t LineLen = 0; |
| while (true) { |
| ssize_t Len = getline(&Line, &LineLen, Stream); |
| if (Len == -1) |
| break; |
| |
| std::stringstream SS(Line); |
| |
| std::string Start; |
| if (!std::getline(SS, Start, ' ')) |
| continue; |
| char* EndPtr = NULL; |
| uint64_t NStart = strtoull(Start.c_str(), &EndPtr, 16); |
| if (EndPtr == Start.c_str()) |
| continue; |
| |
| char GlobLoc; // Local -> 'l', Global -> 'g', Neither -> ' ' |
| if (!SS.get(GlobLoc)) |
| continue; |
| char Weak; // Weak? |
| if (!SS.get(Weak)) |
| continue; |
| char Space; |
| if (!SS.get(Space)) // Constructor. Not supported yet. |
| continue; |
| if (!SS.get(Space)) // Warning. Not supported yet. |
| continue; |
| char IFunc; // Indirect reference to another symbol. |
| if (!SS.get(IFunc)) |
| continue; |
| char Debug; // Debugging (d) or dynamic (D) symbol. |
| if (!SS.get(Debug)) |
| continue; |
| char FileFunc; // Name of function (F), file (f) or object (O). |
| if (!SS.get(FileFunc)) |
| continue; |
| if (FileFunc != 'F') |
| continue; |
| if (!SS.get(Space)) |
| continue; |
| |
| std::string Section; |
| if (!std::getline(SS, Section, '\t')) |
| continue; |
| if (Section != ".text") |
| continue; |
| |
| std::string Extent; |
| if (!std::getline(SS, Extent, ' ')) |
| continue; |
| uint64_t NExtent = strtoull(Extent.c_str(), &EndPtr, 16); |
| if (EndPtr == Extent.c_str()) |
| continue; |
| |
| std::string Func; |
| while (std::getline(SS, Func, ' ') && Func.empty()); // Skip spaces. |
| if (Func.empty()) |
| continue; |
| // Note Func includes the symbol table visibility if any. |
| std::string FuncRest; |
| if (std::getline(SS, FuncRest)) // Read the rest line if any. |
| Func += std::string(" ") + FuncRest; |
| if (Func.back() == '\n') |
| Func.pop_back(); |
| push_back({NStart, NStart + NExtent, Func}); |
| } |
| if (Line) |
| free(Line); |
| |
| #ifdef _WIN32 |
| _pclose(Stream); |
| #else |
| fclose(Stream); |
| wait(NULL); |
| #endif |
| } |
| |
| void reset(Map *M) { |
| clear(); |
| |
| // Take possible difference between "offset" and "virtual address" of |
| // the executable segment into account. |
| uint64_t FileOffset, VAddr; |
| fetchExecSegment(M, &FileOffset, &VAddr); |
| M->VAddrToFileOffset = FileOffset - VAddr; |
| |
| // Fetch both dynamic and static symbols, sort and unique them. |
| fetchSymbols(M); |
| |
| std::sort(begin(), end()); |
| auto NewEnd = std::unique(begin(), end()); |
| erase(NewEnd, end()); |
| } |
| |
| protected: |
| int splitLine(const std::string& line, std::vector<std::string>& output, char delim = ' ') { |
| std::stringstream ss(line); |
| std::string token; |
| while (std::getline(ss, token, delim)) { |
| output.push_back(token); |
| } |
| return (int)output.size(); |
| } |
| }; |
| |
| class ObjdumpOutput { |
| public: |
| std::string Objdump, BinaryCacheRoot; |
| FILE *Stream; |
| const char *ThisText; |
| uint64_t ThisAddress; |
| uint64_t EndAddress; |
| char *Line; |
| size_t LineLen; |
| |
| ObjdumpOutput(std::string Objdump, std::string BinaryCacheRoot) |
| : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot), Stream(nullptr), |
| Line(NULL), LineLen(0) {} |
| ~ObjdumpOutput() { |
| if (Stream) { |
| #ifdef _WIN32 |
| _pclose(Stream); |
| #else |
| fclose(Stream); |
| wait(NULL); |
| #endif |
| } |
| if (Line) |
| free(Line); |
| } |
| |
| void reset(Map *M, uint64_t Start, uint64_t Stop) { |
| ThisAddress = 0; |
| ThisText = ""; |
| if (Stream) { |
| #ifdef _WIN32 |
| _pclose(Stream); |
| #else |
| fclose(Stream); |
| wait(NULL); |
| #endif |
| } |
| |
| char buf1[32], buf2[32]; |
| sprintf(buf1, "%#" PRIx64, Start); |
| sprintf(buf2, "%#" PRIx64, Stop + 4); |
| |
| std::string Cmd = Objdump + " -d --no-show-raw-insn --start-address=" + |
| std::string(buf1) + " --stop-address=" + |
| std::string(buf2) + " " + |
| BinaryCacheRoot + std::string(M->Filename) + |
| #ifdef _WIN32 |
| " 2> NUL"; |
| #else |
| " 2>/dev/null"; |
| #endif |
| Stream = ForkAndExec(Cmd); |
| |
| EndAddress = Stop; |
| }; |
| |
| std::string getText() { return ThisText; } |
| |
| uint64_t next() { |
| getLine(); |
| return ThisAddress; |
| } |
| |
| void getLine() { |
| while (true) { |
| ssize_t Len = getline(&Line, &LineLen, Stream); |
| if (Len == -1) { |
| ThisAddress = EndAddress; |
| ThisText = ""; |
| return; |
| } |
| char *TokBuf; |
| char *One = strtok_r(Line, ":", &TokBuf); |
| char *Two = strtok_r(NULL, "\n", &TokBuf); |
| if (!One || !Two) |
| continue; |
| char *EndPtr = NULL; |
| uint64_t Address = strtoull(One, &EndPtr, 16); |
| if (EndPtr != &One[strlen(One)]) |
| continue; |
| |
| ThisAddress = Address; |
| ThisText = Two; |
| break; |
| } |
| } |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // PerfReader |
| //===----------------------------------------------------------------------===// |
| |
| class PerfReader { |
| public: |
| PerfReader(const std::string &Filename, std::string Objdump, |
| std::string BinaryCacheRoot); |
| ~PerfReader(); |
| |
| void readHeader(); |
| void readAttrs(); |
| void readEventDesc(); |
| void readDataStream(); |
| void registerNewMapping(unsigned char *Buf, const char *FileName); |
| unsigned char *readEvent(unsigned char *); |
| perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout); |
| void emitLine(uint64_t PC, std::map<const char *, uint64_t> *Counters, |
| const std::string &Text); |
| void emitFunctionStart(std::string &Name); |
| void emitFunctionEnd(std::string &Name, |
| std::map<const char *, uint64_t> &Counters); |
| void emitTopLevelCounters(); |
| void emitMaps(); |
| void emitSymbol( |
| Symbol &Sym, Map &M, |
| std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event, |
| std::map<const char *, uint64_t> &SymEvents); |
| PyObject *complete(); |
| |
| private: |
| unsigned char *Buffer; |
| #ifdef _WIN32 |
| HANDLE hFile; |
| HANDLE hMapFile; |
| #else |
| size_t BufferLen; |
| #endif |
| |
| perf_header *Header; |
| std::map<uint64_t, const char *> EventIDs; |
| std::map<uint64_t, uint64_t> EventLayouts; |
| std::map<size_t, std::map<uint64_t, std::map<const char *, uint64_t>>> Events; |
| std::map<const char *, uint64_t> TotalEvents; |
| std::map<uint64_t, std::map<const char *, uint64_t>> TotalEventsPerMap; |
| std::vector<Map> Maps; |
| std::map<uint64_t, std::map<uint64_t, EventDesc>> CurrentMaps; |
| |
| PyObject *Functions, *TopLevelCounters; |
| std::vector<PyObject*> Lines; |
| |
| std::string Objdump, BinaryCacheRoot; |
| }; |
| |
| PerfReader::PerfReader(const std::string &Filename, std::string Objdump, |
| std::string BinaryCacheRoot) |
| : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) { |
| TopLevelCounters = PyDict_New(); |
| Functions = PyDict_New(); |
| #ifdef _WIN32 |
| Buffer = nullptr; |
| hMapFile = nullptr; |
| hFile = ::CreateFileA(Filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, |
| OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); |
| assert(hFile != INVALID_HANDLE_VALUE); |
| LARGE_INTEGER size; |
| size.QuadPart = 0; |
| assert(::GetFileSizeEx(hFile, &size) != FALSE); |
| hMapFile = ::CreateFileMapping(hFile, NULL, PAGE_READONLY, size.HighPart, size.LowPart, NULL); |
| assert(hMapFile != nullptr); |
| Buffer = (unsigned char*)::MapViewOfFile(hMapFile, FILE_MAP_READ, 0, 0, 0); |
| assert(Buffer != nullptr); |
| #else |
| int fd = open(Filename.c_str(), O_RDONLY); |
| assert(fd > 0); |
| |
| struct stat sb; |
| assert(fstat(fd, &sb) == 0); |
| BufferLen = (size_t)sb.st_size; |
| |
| Buffer = (unsigned char *)mmap(NULL, BufferLen, PROT_READ, MAP_SHARED, fd, 0); |
| assert(Buffer != MAP_FAILED); |
| #endif |
| } |
| |
| PerfReader::~PerfReader() { |
| #ifdef _WIN32 |
| if (hMapFile != NULL) { |
| if (Buffer != NULL) ::UnmapViewOfFile(Buffer); |
| ::CloseHandle(hMapFile); |
| } |
| if (hFile != INVALID_HANDLE_VALUE) ::CloseHandle(hFile); |
| #else |
| munmap(Buffer, BufferLen); |
| #endif |
| } |
| |
| void PerfReader::readHeader() { |
| Header = (perf_header *)&Buffer[0]; |
| |
| assert(!strncmp(Header->magic, "PERFILE2", 8)); |
| } |
| |
| void PerfReader::readDataStream() { |
| unsigned char *Buf = &Buffer[Header->data.offset]; |
| unsigned char *End = Buf + Header->data.size; |
| while (Buf < End) |
| Buf = readEvent(Buf); |
| } |
| |
| #define HEADER_EVENT_DESC 12 |
| |
| void PerfReader::readAttrs() { |
| if (Header->flags & (1U << HEADER_EVENT_DESC)) { |
| readEventDesc(); |
| } else { |
| uint64_t NumEvents = Header->attrs.size / Header->attr_size; |
| for (unsigned I = 0; I < NumEvents; ++I) { |
| const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size]; |
| const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size); |
| unsigned char* Buf = &Buffer[ids->offset]; |
| uint64_t NumIDs = ids->size / sizeof(uint64_t); |
| |
| const char* Str = "unknown"; |
| switch (attr->type) { |
| case PERF_TYPE_HARDWARE: |
| if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config]; |
| break; |
| case PERF_TYPE_SOFTWARE: |
| if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config]; |
| break; |
| } |
| |
| // Weirdness of perf: if there is only one event descriptor, that |
| // event descriptor can be referred to by ANY id! |
| if (NumEvents == 1) { |
| EventIDs[0] = Str; |
| EventLayouts[0] = attr->sample_type; |
| } |
| |
| for (unsigned J = 0; J < NumIDs; ++J) { |
| auto id = TakeU64(Buf); |
| EventIDs[id] = Str; |
| EventLayouts[id] = attr->sample_type; |
| } |
| } |
| } |
| } |
| |
| void PerfReader::readEventDesc() { |
| perf_file_section *P = |
| (perf_file_section *)&Buffer[Header->data.offset + Header->data.size]; |
| for (int I = 0; I < HEADER_EVENT_DESC; ++I) |
| if (Header->flags & (1ULL << I)) |
| ++P; |
| |
| unsigned char *Buf = &Buffer[P->offset]; |
| uint32_t NumEvents = TakeU32(Buf); |
| uint32_t AttrSize = TakeU32(Buf); |
| for (unsigned I = 0; I < NumEvents; ++I) { |
| // Parse out the "config" bitmask for the struct layout. |
| auto *Buf2 = Buf; |
| (void)TakeU32(Buf2); (void)TakeU32(Buf2); (void)TakeU64(Buf2); (void)TakeU64(Buf2); |
| auto Layout = TakeU64(Buf2); |
| |
| Buf += AttrSize; |
| uint32_t NumIDs = TakeU32(Buf); |
| |
| uint32_t StrLen = TakeU32(Buf); |
| const char *Str = (const char *)Buf; |
| Buf += StrLen; |
| |
| // Weirdness of perf: if there is only one event descriptor, that |
| // event descriptor can be referred to by ANY id! |
| if (NumEvents == 1) { |
| EventIDs[0] = Str; |
| EventLayouts[0] = Layout; |
| } |
| |
| for (unsigned J = 0; J < NumIDs; ++J) { |
| auto L = TakeU64(Buf); |
| EventIDs[L] = Str; |
| EventLayouts[L] = Layout; |
| } |
| } |
| } |
| |
| static uint64_t getTimeFromSampleId(unsigned char *EndOfStruct, |
| uint64_t Layout) { |
| uint64_t *Ptr = (uint64_t *)EndOfStruct; |
| // Each of the PERF_SAMPLE_* bits tested below adds an 8-byte field. |
| if (Layout & PERF_SAMPLE_IDENTIFIER) |
| --Ptr; |
| if (Layout & PERF_SAMPLE_CPU) |
| --Ptr; |
| if (Layout & PERF_SAMPLE_STREAM_ID) |
| --Ptr; |
| if (Layout & PERF_SAMPLE_ID) |
| --Ptr; |
| assert(Layout & PERF_SAMPLE_TIME); |
| --Ptr; |
| return *Ptr; |
| } |
| |
| void PerfReader::registerNewMapping(unsigned char *Buf, const char *Filename) { |
| perf_event_mmap_common *E = (perf_event_mmap_common *)Buf; |
| auto MapID = Maps.size(); |
| |
| uint64_t End = E->start + E->extent; |
| Map NewMapping(E->start, End, Filename); |
| NewMapping.FileToPCOffset = E->start - E->pgoff; |
| Maps.push_back(NewMapping); |
| |
| unsigned char *EndOfEvent = Buf + E->header.size; |
| // FIXME: The first EventID is used for every event. |
| // FIXME: The code assumes perf_event_attr.sample_id_all is set. |
| uint64_t Time = getTimeFromSampleId(EndOfEvent, EventLayouts.begin()->second); |
| auto &CurrentMap = CurrentMaps[Time]; |
| CurrentMap.insert({E->start, { E->start, End, MapID}}); |
| } |
| |
| unsigned char *PerfReader::readEvent(unsigned char *Buf) { |
| perf_event_header *E = (perf_event_header *)Buf; |
| switch (E->type) { |
| case PERF_RECORD_MMAP: |
| { |
| perf_event_mmap *E = (perf_event_mmap *)Buf; |
| registerNewMapping(Buf, E->filename); |
| } |
| break; |
| case PERF_RECORD_MMAP2: |
| { |
| perf_event_mmap2 *E = (perf_event_mmap2 *)Buf; |
| if (!(E->prot & PROT_EXEC)) |
| break; |
| registerNewMapping(Buf, E->filename); |
| } |
| break; |
| case PERF_RECORD_SAMPLE: |
| { |
| perf_event_sample* E = (perf_event_sample*)Buf; |
| auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), |
| EventLayouts.begin()->second); |
| auto EventID = NewE.id; |
| auto PC = NewE.ip; |
| |
| // Search for the map corresponding to this sample. Search backwards through |
| // time, discarding any maps created after our timestamp. |
| uint64_t MapID = ~0ULL; |
| for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); |
| I != E; ++I) { |
| if (I->first > NewE.time) |
| continue; |
| |
| auto NewI = I->second.upper_bound(PC); |
| if (NewI == I->second.begin()) |
| continue; |
| --NewI; |
| |
| if (NewI->second.Start > PC || NewI->second.End < PC) |
| continue; |
| MapID = NewI->second.MapId; |
| break; |
| } |
| if (MapID != ~0ULL) { |
| assert(EventIDs.count(EventID)); |
| Events[MapID][PC][EventIDs[EventID]] += NewE.period; |
| |
| TotalEvents[EventIDs[EventID]] += NewE.period; |
| TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; |
| } |
| } |
| break; |
| } |
| return &Buf[E->size]; |
| } |
| |
| perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) { |
| perf_event_sample E; |
| memset((char*)&E, 0, sizeof(E)); |
| |
| assert(Layout & PERF_SAMPLE_IP); |
| assert(Layout & PERF_SAMPLE_PERIOD); |
| |
| if (Layout & PERF_SAMPLE_IDENTIFIER) |
| E.id = TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_IP) |
| E.ip = TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_TID) { |
| E.pid = TakeU32(Buf); |
| E.tid = TakeU32(Buf); |
| } |
| if (Layout & PERF_SAMPLE_TIME) |
| E.time = TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_ADDR) |
| (void) TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_ID) |
| E.id = TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_STREAM_ID) |
| (void) TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_CPU) |
| (void) TakeU64(Buf); |
| if (Layout & PERF_SAMPLE_PERIOD) |
| E.period = TakeU64(Buf); |
| |
| return E; |
| } |
| |
| void PerfReader::emitFunctionStart(std::string &Name) { |
| Lines.clear(); |
| } |
| |
| void PerfReader::emitFunctionEnd(std::string &Name, |
| std::map<const char *, uint64_t> &Counters) { |
| auto *CounterDict = PyDict_New(); |
| for (auto &KV : Counters) |
| PyDict_SetItemString(CounterDict, KV.first, |
| PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); |
| |
| auto *LinesList = PyList_New(Lines.size()); |
| unsigned Idx = 0; |
| for (auto *I : Lines) |
| PyList_SetItem(LinesList, Idx++, I); |
| |
| auto *FnDict = PyDict_New(); |
| PyDict_SetItemString(FnDict, "counters", CounterDict); |
| PyDict_SetItemString(FnDict, "data", LinesList); |
| |
| PyDict_SetItemString(Functions, Name.c_str(), FnDict); |
| } |
| |
| void PerfReader::emitLine(uint64_t PC, |
| std::map<const char *, uint64_t> *Counters, |
| const std::string &Text) { |
| auto *CounterDict = PyDict_New(); |
| if (Counters) |
| for (auto &KV : *Counters) |
| PyDict_SetItemString(CounterDict, KV.first, |
| PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); |
| |
| auto *Line = Py_BuildValue("[NKs]", |
| CounterDict, |
| (unsigned long long) PC, |
| (char*) Text.c_str()); |
| Lines.push_back(Line); |
| } |
| |
| void PerfReader::emitTopLevelCounters() { |
| for (auto &KV : TotalEvents) |
| PyDict_SetItemString(TopLevelCounters, KV.first, |
| PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); |
| } |
| |
| void PerfReader::emitMaps() { |
| for (auto &KV : Events) { |
| auto MapID = KV.first; |
| auto &MapEvents = KV.second; |
| |
| if (MapEvents.empty()) |
| continue; |
| |
| if (MapID > Maps.size()) { |
| // Something went badly wrong. Try and recover. |
| continue; |
| } |
| |
| // Are there enough events here to bother with? |
| bool AllUnderThreshold = true; |
| for (auto &I : TotalEventsPerMap[MapID]) { |
| auto Total = TotalEvents[I.first]; |
| // If a map contains more than 1% of some event, bother with it. |
| if ((float)I.second / (float)Total > 0.01f) { |
| AllUnderThreshold = false; |
| break; |
| } |
| } |
| if (AllUnderThreshold) |
| continue; |
| |
| Map &M = Maps[MapID]; |
| SymTabOutput Syms(Objdump, BinaryCacheRoot); |
| Syms.reset(&M); |
| |
| uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset; |
| |
| // Accumulate the event totals for each symbol |
| auto Sym = Syms.begin(); |
| auto Event = MapEvents.begin(); |
| std::map<uint64_t, std::map<const char*, uint64_t>> SymToEventTotals; |
| while (Event != MapEvents.end() && Sym != Syms.end()) { |
| // Skip events until we find one after the start of Sym |
| auto VAddr = Event->first - VAddrToPCOffset; |
| if (VAddr < Sym->Start) { |
| ++Event; |
| continue; |
| } |
| // Skip symbols until the event is before the end of Sym |
| if (VAddr >= Sym->End) { |
| ++Sym; |
| continue; |
| } |
| // We now know that Event lies within Sym, so add it to the totals |
| for (auto &KV : Event->second) |
| SymToEventTotals[Sym->Start][KV.first] += KV.second; |
| ++Event; |
| } |
| |
| // Emit only symbols that took up > 0.5% of any counter |
| for (auto &Sym : Syms) { |
| bool Keep = false; |
| for (auto &KV : SymToEventTotals[Sym.Start]) { |
| if ((double)KV.second / (double)TotalEvents[KV.first] > 0.005) { |
| Keep = true; |
| break; |
| } |
| } |
| if (Keep) |
| emitSymbol(Sym, M, MapEvents.lower_bound(Sym.Start + VAddrToPCOffset), |
| SymToEventTotals[Sym.Start]); |
| } |
| } |
| } |
| |
| void PerfReader::emitSymbol( |
| Symbol &Sym, Map &M, |
| std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event, |
| std::map<const char *, uint64_t> &SymEvents) { |
| uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset; |
| ObjdumpOutput Dump(Objdump, BinaryCacheRoot); |
| Dump.reset(&M, Sym.Start, Sym.End); |
| |
| emitFunctionStart(Sym.Name); |
| assert(Sym.Start <= Event->first - VAddrToPCOffset && |
| Event->first - VAddrToPCOffset < Sym.End); |
| for (uint64_t I = Dump.next(); I < Sym.End; I = Dump.next()) { |
| auto VAddr = Event->first - VAddrToPCOffset; |
| |
| auto Text = Dump.getText(); |
| if (VAddr == I) { |
| emitLine(I, &Event->second, Text); |
| ++Event; |
| } else { |
| emitLine(I, nullptr, Text); |
| } |
| } |
| emitFunctionEnd(Sym.Name, SymEvents); |
| } |
| |
| PyObject *PerfReader::complete() { |
| auto *Obj = PyDict_New(); |
| PyDict_SetItemString(Obj, "counters", TopLevelCounters); |
| PyDict_SetItemString(Obj, "functions", Functions); |
| return Obj; |
| } |
| |
| #ifndef STANDALONE |
| static PyObject *cPerf_importPerf(PyObject *self, PyObject *args) { |
| const char *Fname; |
| const char *Objdump = "objdump"; |
| const char *BinaryCacheRoot = ""; |
| if (!PyArg_ParseTuple(args, "s|ss", &Fname, &Objdump, &BinaryCacheRoot)) |
| return NULL; |
| |
| try { |
| PerfReader P(Fname, Objdump, BinaryCacheRoot); |
| P.readHeader(); |
| P.readAttrs(); |
| P.readDataStream(); |
| P.emitTopLevelCounters(); |
| P.emitMaps(); |
| return P.complete(); |
| } catch (std::logic_error &E) { |
| PyErr_SetString(PyExc_AssertionError, E.what()); |
| return NULL; |
| } catch (std::runtime_error &E) { |
| PyErr_SetString(PyExc_RuntimeError, E.what()); |
| return NULL; |
| } catch (...) { |
| PyErr_SetString(PyExc_RuntimeError, "Unknown error"); |
| return NULL; |
| } |
| } |
| |
| static PyMethodDef cPerfMethods[] = {{"importPerf", cPerf_importPerf, |
| METH_VARARGS, |
| "Import perf.data from a filename"}, |
| {NULL, NULL, 0, NULL}}; |
| |
| static PyModuleDef cPerfModuleDef = {PyModuleDef_HEAD_INIT, |
| "cPerf", |
| nullptr, |
| -1, |
| cPerfMethods, |
| nullptr, |
| nullptr, |
| nullptr, |
| nullptr}; |
| |
| PyMODINIT_FUNC PyInit_cPerf(void) { |
| return PyModule_Create(&cPerfModuleDef); |
| } |
| |
| #else // STANDALONE |
| |
| // You can build the standalone version for a debug purpose using |
| // clang++ cPerf.cpp -o /tmp/cPerf -DSTANDALONE $(pkg-config --cflags --libs python3-embed) |
| |
| // getenv() is permitted to overwrite its return value on subsequent calls, |
| // so copy it to std::string early. |
| static std::string getEnvVar(const char *VarName, const char *DefaultValue) { |
| const char *Value = getenv(VarName); |
| if (!Value) |
| Value = DefaultValue; |
| return Value; |
| } |
| |
| int main(int argc, char **argv) { |
| Py_Initialize(); |
| if (argc < 2) return -1; |
| |
| std::string BinaryCacheRoot = getEnvVar("LNT_BINARY_CACHE_ROOT", ""); |
| std::string Objdump = getEnvVar("CMAKE_OBJDUMP", "objdump"); |
| |
| PerfReader P(argv[1], Objdump, BinaryCacheRoot); |
| P.readHeader(); |
| P.readAttrs(); |
| P.readDataStream(); |
| P.emitTopLevelCounters(); |
| P.emitMaps(); |
| PyObject_Print(P.complete(), stdout, Py_PRINT_RAW); |
| fputs("\n", stdout); // Usually expected by UNIX shells |
| Py_FinalizeEx(); |
| return 0; |
| } |
| |
| #endif |