| //===-- memprof_allocator.cpp --------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of MemProfiler, a memory profiler. |
| // |
| // Implementation of MemProf's memory allocator, which uses the allocator |
| // from sanitizer_common. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "memprof_allocator.h" |
| #include "memprof_mapping.h" |
| #include "memprof_stack.h" |
| #include "memprof_thread.h" |
| #include "sanitizer_common/sanitizer_allocator_checks.h" |
| #include "sanitizer_common/sanitizer_allocator_interface.h" |
| #include "sanitizer_common/sanitizer_allocator_report.h" |
| #include "sanitizer_common/sanitizer_errno.h" |
| #include "sanitizer_common/sanitizer_file.h" |
| #include "sanitizer_common/sanitizer_flags.h" |
| #include "sanitizer_common/sanitizer_internal_defs.h" |
| #include "sanitizer_common/sanitizer_list.h" |
| #include "sanitizer_common/sanitizer_stackdepot.h" |
| |
| #include <sched.h> |
| #include <stdlib.h> |
| #include <time.h> |
| |
| namespace __memprof { |
| |
| static int GetCpuId(void) { |
| // _memprof_preinit is called via the preinit_array, which subsequently calls |
| // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu |
| // will seg fault as the address of __vdso_getcpu will be null. |
| if (!memprof_init_done) |
| return -1; |
| return sched_getcpu(); |
| } |
| |
| // Compute the timestamp in ms. |
| static int GetTimestamp(void) { |
| // timespec_get will segfault if called from dl_init |
| if (!memprof_timestamp_inited) { |
| // By returning 0, this will be effectively treated as being |
| // timestamped at memprof init time (when memprof_init_timestamp_s |
| // is initialized). |
| return 0; |
| } |
| timespec ts; |
| clock_gettime(CLOCK_REALTIME, &ts); |
| return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000; |
| } |
| |
| static MemprofAllocator &get_allocator(); |
| |
| // The memory chunk allocated from the underlying allocator looks like this: |
| // H H U U U U U U |
| // H -- ChunkHeader (32 bytes) |
| // U -- user memory. |
| |
| // If there is left padding before the ChunkHeader (due to use of memalign), |
| // we store a magic value in the first uptr word of the memory block and |
| // store the address of ChunkHeader in the next uptr. |
| // M B L L L L L L L L L H H U U U U U U |
| // | ^ |
| // ---------------------| |
| // M -- magic value kAllocBegMagic |
| // B -- address of ChunkHeader pointing to the first 'H' |
| |
| constexpr uptr kMaxAllowedMallocBits = 40; |
| |
| // Should be no more than 32-bytes |
| struct ChunkHeader { |
| // 1-st 4 bytes. |
| u32 alloc_context_id; |
| // 2-nd 4 bytes |
| u32 cpu_id; |
| // 3-rd 4 bytes |
| u32 timestamp_ms; |
| // 4-th 4 bytes |
| // Note only 1 bit is needed for this flag if we need space in the future for |
| // more fields. |
| u32 from_memalign; |
| // 5-th and 6-th 4 bytes |
| // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this |
| // could be shrunk to kMaxAllowedMallocBits if we need space in the future for |
| // more fields. |
| atomic_uint64_t user_requested_size; |
| // 23 bits available |
| // 7-th and 8-th 4 bytes |
| u64 data_type_id; // TODO: hash of type name |
| }; |
| |
| static const uptr kChunkHeaderSize = sizeof(ChunkHeader); |
| COMPILER_CHECK(kChunkHeaderSize == 32); |
| |
| struct MemprofChunk : ChunkHeader { |
| uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; } |
| uptr UsedSize() { |
| return atomic_load(&user_requested_size, memory_order_relaxed); |
| } |
| void *AllocBeg() { |
| if (from_memalign) |
| return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this)); |
| return reinterpret_cast<void *>(this); |
| } |
| }; |
| |
| class LargeChunkHeader { |
| static constexpr uptr kAllocBegMagic = |
| FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); |
| atomic_uintptr_t magic; |
| MemprofChunk *chunk_header; |
| |
| public: |
| MemprofChunk *Get() const { |
| return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic |
| ? chunk_header |
| : nullptr; |
| } |
| |
| void Set(MemprofChunk *p) { |
| if (p) { |
| chunk_header = p; |
| atomic_store(&magic, kAllocBegMagic, memory_order_release); |
| return; |
| } |
| |
| uptr old = kAllocBegMagic; |
| if (!atomic_compare_exchange_strong(&magic, &old, 0, |
| memory_order_release)) { |
| CHECK_EQ(old, kAllocBegMagic); |
| } |
| } |
| }; |
| |
| void FlushUnneededMemProfShadowMemory(uptr p, uptr size) { |
| // Since memprof's mapping is compacting, the shadow chunk may be |
| // not page-aligned, so we only flush the page-aligned portion. |
| ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size)); |
| } |
| |
| void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const { |
| // Statistics. |
| MemprofStats &thread_stats = GetCurrentThreadStats(); |
| thread_stats.mmaps++; |
| thread_stats.mmaped += size; |
| } |
| void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const { |
| // We are about to unmap a chunk of user memory. |
| // Mark the corresponding shadow memory as not needed. |
| FlushUnneededMemProfShadowMemory(p, size); |
| // Statistics. |
| MemprofStats &thread_stats = GetCurrentThreadStats(); |
| thread_stats.munmaps++; |
| thread_stats.munmaped += size; |
| } |
| |
| AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) { |
| CHECK(ms); |
| return &ms->allocator_cache; |
| } |
| |
| struct MemInfoBlock { |
| u32 alloc_count; |
| u64 total_access_count, min_access_count, max_access_count; |
| u64 total_size; |
| u32 min_size, max_size; |
| u32 alloc_timestamp, dealloc_timestamp; |
| u64 total_lifetime; |
| u32 min_lifetime, max_lifetime; |
| u32 alloc_cpu_id, dealloc_cpu_id; |
| u32 num_migrated_cpu; |
| |
| // Only compared to prior deallocated object currently. |
| u32 num_lifetime_overlaps; |
| u32 num_same_alloc_cpu; |
| u32 num_same_dealloc_cpu; |
| |
| u64 data_type_id; // TODO: hash of type name |
| |
| MemInfoBlock() : alloc_count(0) {} |
| |
| MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, |
| u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) |
| : alloc_count(1), total_access_count(access_count), |
| min_access_count(access_count), max_access_count(access_count), |
| total_size(size), min_size(size), max_size(size), |
| alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), |
| total_lifetime(dealloc_timestamp - alloc_timestamp), |
| min_lifetime(total_lifetime), max_lifetime(total_lifetime), |
| alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), |
| num_lifetime_overlaps(0), num_same_alloc_cpu(0), |
| num_same_dealloc_cpu(0) { |
| num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; |
| } |
| |
| void Print(u64 id) { |
| u64 p; |
| if (flags()->print_terse) { |
| p = total_size * 100 / alloc_count; |
| Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100, |
| min_size, max_size); |
| p = total_access_count * 100 / alloc_count; |
| Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count, |
| max_access_count); |
| p = total_lifetime * 100 / alloc_count; |
| Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); |
| Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, |
| num_same_alloc_cpu, num_same_dealloc_cpu); |
| } else { |
| p = total_size * 100 / alloc_count; |
| Printf("Memory allocation stack id = %llu\n", id); |
| Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n", |
| alloc_count, p / 100, p % 100, min_size, max_size); |
| p = total_access_count * 100 / alloc_count; |
| Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100, |
| p % 100, min_access_count, max_access_count); |
| p = total_lifetime * 100 / alloc_count; |
| Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100, |
| min_lifetime, max_lifetime); |
| Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " |
| "cpu: %u, num same dealloc_cpu: %u\n", |
| num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, |
| num_same_dealloc_cpu); |
| } |
| } |
| |
| static void printHeader() { |
| CHECK(flags()->print_terse); |
| Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" |
| "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" |
| "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" |
| "NumSameDeallocCpu\n"); |
| } |
| |
| void Merge(MemInfoBlock &newMIB) { |
| alloc_count += newMIB.alloc_count; |
| |
| total_access_count += newMIB.total_access_count; |
| min_access_count = Min(min_access_count, newMIB.min_access_count); |
| max_access_count = Max(max_access_count, newMIB.max_access_count); |
| |
| total_size += newMIB.total_size; |
| min_size = Min(min_size, newMIB.min_size); |
| max_size = Max(max_size, newMIB.max_size); |
| |
| total_lifetime += newMIB.total_lifetime; |
| min_lifetime = Min(min_lifetime, newMIB.min_lifetime); |
| max_lifetime = Max(max_lifetime, newMIB.max_lifetime); |
| |
| // We know newMIB was deallocated later, so just need to check if it was |
| // allocated before last one deallocated. |
| num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; |
| alloc_timestamp = newMIB.alloc_timestamp; |
| dealloc_timestamp = newMIB.dealloc_timestamp; |
| |
| num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; |
| num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; |
| alloc_cpu_id = newMIB.alloc_cpu_id; |
| dealloc_cpu_id = newMIB.dealloc_cpu_id; |
| } |
| }; |
| |
| static u32 AccessCount = 0; |
| static u32 MissCount = 0; |
| |
| struct SetEntry { |
| SetEntry() : id(0), MIB() {} |
| bool Empty() { return id == 0; } |
| void Print() { |
| CHECK(!Empty()); |
| MIB.Print(id); |
| } |
| // The stack id |
| u64 id; |
| MemInfoBlock MIB; |
| }; |
| |
| struct CacheSet { |
| enum { kSetSize = 4 }; |
| |
| void PrintAll() { |
| for (int i = 0; i < kSetSize; i++) { |
| if (Entries[i].Empty()) |
| continue; |
| Entries[i].Print(); |
| } |
| } |
| void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { |
| AccessCount++; |
| SetAccessCount++; |
| |
| for (int i = 0; i < kSetSize; i++) { |
| auto id = Entries[i].id; |
| // Check if this is a hit or an empty entry. Since we always move any |
| // filled locations to the front of the array (see below), we don't need |
| // to look after finding the first empty entry. |
| if (id == new_id || !id) { |
| if (id == 0) { |
| Entries[i].id = new_id; |
| Entries[i].MIB = newMIB; |
| } else { |
| Entries[i].MIB.Merge(newMIB); |
| } |
| // Assuming some id locality, we try to swap the matching entry |
| // into the first set position. |
| if (i != 0) { |
| auto tmp = Entries[0]; |
| Entries[0] = Entries[i]; |
| Entries[i] = tmp; |
| } |
| return; |
| } |
| } |
| |
| // Miss |
| MissCount++; |
| SetMissCount++; |
| |
| // We try to find the entries with the lowest alloc count to be evicted: |
| int min_idx = 0; |
| u64 min_count = Entries[0].MIB.alloc_count; |
| for (int i = 1; i < kSetSize; i++) { |
| CHECK(!Entries[i].Empty()); |
| if (Entries[i].MIB.alloc_count < min_count) { |
| min_idx = i; |
| min_count = Entries[i].MIB.alloc_count; |
| } |
| } |
| |
| // Print the evicted entry profile information |
| if (!flags()->print_terse) |
| Printf("Evicted:\n"); |
| Entries[min_idx].Print(); |
| |
| // Similar to the hit case, put new MIB in first set position. |
| if (min_idx != 0) |
| Entries[min_idx] = Entries[0]; |
| Entries[0].id = new_id; |
| Entries[0].MIB = newMIB; |
| } |
| |
| void PrintMissRate(int i) { |
| u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0; |
| Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount, |
| SetAccessCount, p / 100, p % 100); |
| } |
| |
| SetEntry Entries[kSetSize]; |
| u32 SetAccessCount = 0; |
| u32 SetMissCount = 0; |
| }; |
| |
| struct MemInfoBlockCache { |
| MemInfoBlockCache() { |
| if (common_flags()->print_module_map) |
| DumpProcessMap(); |
| if (flags()->print_terse) |
| MemInfoBlock::printHeader(); |
| Sets = |
| (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); |
| Constructed = true; |
| } |
| |
| ~MemInfoBlockCache() { free(Sets); } |
| |
| void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { |
| u64 hv = new_id; |
| |
| // Use mod method where number of entries should be a prime close to power |
| // of 2. |
| hv %= flags()->mem_info_cache_entries; |
| |
| return Sets[hv].insertOrMerge(new_id, newMIB); |
| } |
| |
| void PrintAll() { |
| for (int i = 0; i < flags()->mem_info_cache_entries; i++) { |
| Sets[i].PrintAll(); |
| } |
| } |
| |
| void PrintMissRate() { |
| if (!flags()->print_mem_info_cache_miss_rate) |
| return; |
| u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; |
| Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount, |
| p / 100, p % 100); |
| if (flags()->print_mem_info_cache_miss_rate_details) |
| for (int i = 0; i < flags()->mem_info_cache_entries; i++) |
| Sets[i].PrintMissRate(i); |
| } |
| |
| CacheSet *Sets; |
| // Flag when the Sets have been allocated, in case a deallocation is called |
| // very early before the static init of the Allocator and therefore this table |
| // have completed. |
| bool Constructed = false; |
| }; |
| |
| // Accumulates the access count from the shadow for the given pointer and size. |
| u64 GetShadowCount(uptr p, u32 size) { |
| u64 *shadow = (u64 *)MEM_TO_SHADOW(p); |
| u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size); |
| u64 count = 0; |
| for (; shadow <= shadow_end; shadow++) |
| count += *shadow; |
| return count; |
| } |
| |
| // Clears the shadow counters (when memory is allocated). |
| void ClearShadow(uptr addr, uptr size) { |
| CHECK(AddrIsAlignedByGranularity(addr)); |
| CHECK(AddrIsInMem(addr)); |
| CHECK(AddrIsAlignedByGranularity(addr + size)); |
| CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); |
| CHECK(REAL(memset)); |
| uptr shadow_beg = MEM_TO_SHADOW(addr); |
| uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; |
| if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { |
| REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
| } else { |
| uptr page_size = GetPageSizeCached(); |
| uptr page_beg = RoundUpTo(shadow_beg, page_size); |
| uptr page_end = RoundDownTo(shadow_end, page_size); |
| |
| if (page_beg >= page_end) { |
| REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
| } else { |
| if (page_beg != shadow_beg) { |
| REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg); |
| } |
| if (page_end != shadow_end) { |
| REAL(memset)((void *)page_end, 0, shadow_end - page_end); |
| } |
| ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr); |
| } |
| } |
| } |
| |
| struct Allocator { |
| static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits; |
| |
| MemprofAllocator allocator; |
| StaticSpinMutex fallback_mutex; |
| AllocatorCache fallback_allocator_cache; |
| |
| uptr max_user_defined_malloc_size; |
| atomic_uint8_t rss_limit_exceeded; |
| |
| MemInfoBlockCache MemInfoBlockTable; |
| bool destructing; |
| |
| // ------------------- Initialization ------------------------ |
| explicit Allocator(LinkerInitialized) : destructing(false) {} |
| |
| ~Allocator() { FinishAndPrint(); } |
| |
| void FinishAndPrint() { |
| if (!flags()->print_terse) |
| Printf("Live on exit:\n"); |
| allocator.ForceLock(); |
| allocator.ForEachChunk( |
| [](uptr chunk, void *alloc) { |
| u64 user_requested_size; |
| MemprofChunk *m = |
| ((Allocator *)alloc) |
| ->GetMemprofChunk((void *)chunk, user_requested_size); |
| if (!m) |
| return; |
| uptr user_beg = ((uptr)m) + kChunkHeaderSize; |
| u64 c = GetShadowCount(user_beg, user_requested_size); |
| long curtime = GetTimestamp(); |
| MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, |
| m->cpu_id, GetCpuId()); |
| ((Allocator *)alloc) |
| ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); |
| }, |
| this); |
| allocator.ForceUnlock(); |
| |
| destructing = true; |
| MemInfoBlockTable.PrintMissRate(); |
| MemInfoBlockTable.PrintAll(); |
| StackDepotPrintAll(); |
| } |
| |
| void InitLinkerInitialized() { |
| SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); |
| allocator.InitLinkerInitialized( |
| common_flags()->allocator_release_to_os_interval_ms); |
| max_user_defined_malloc_size = common_flags()->max_allocation_size_mb |
| ? common_flags()->max_allocation_size_mb |
| << 20 |
| : kMaxAllowedMallocSize; |
| } |
| |
| bool RssLimitExceeded() { |
| return atomic_load(&rss_limit_exceeded, memory_order_relaxed); |
| } |
| |
| void SetRssLimitExceeded(bool limit_exceeded) { |
| atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed); |
| } |
| |
| // -------------------- Allocation/Deallocation routines --------------- |
| void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack, |
| AllocType alloc_type) { |
| if (UNLIKELY(!memprof_inited)) |
| MemprofInitFromRtl(); |
| if (RssLimitExceeded()) { |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportRssLimitExceeded(stack); |
| } |
| CHECK(stack); |
| const uptr min_alignment = MEMPROF_ALIGNMENT; |
| if (alignment < min_alignment) |
| alignment = min_alignment; |
| if (size == 0) { |
| // We'd be happy to avoid allocating memory for zero-size requests, but |
| // some programs/tests depend on this behavior and assume that malloc |
| // would not return NULL even for zero-size allocations. Moreover, it |
| // looks like operator new should never return NULL, and results of |
| // consecutive "new" calls must be different even if the allocated size |
| // is zero. |
| size = 1; |
| } |
| CHECK(IsPowerOfTwo(alignment)); |
| uptr rounded_size = RoundUpTo(size, alignment); |
| uptr needed_size = rounded_size + kChunkHeaderSize; |
| if (alignment > min_alignment) |
| needed_size += alignment; |
| CHECK(IsAligned(needed_size, min_alignment)); |
| if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || |
| size > max_user_defined_malloc_size) { |
| if (AllocatorMayReturnNull()) { |
| Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", |
| (void *)size); |
| return nullptr; |
| } |
| uptr malloc_limit = |
| Min(kMaxAllowedMallocSize, max_user_defined_malloc_size); |
| ReportAllocationSizeTooBig(size, malloc_limit, stack); |
| } |
| |
| MemprofThread *t = GetCurrentThread(); |
| void *allocated; |
| if (t) { |
| AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); |
| allocated = allocator.Allocate(cache, needed_size, 8); |
| } else { |
| SpinMutexLock l(&fallback_mutex); |
| AllocatorCache *cache = &fallback_allocator_cache; |
| allocated = allocator.Allocate(cache, needed_size, 8); |
| } |
| if (UNLIKELY(!allocated)) { |
| SetAllocatorOutOfMemory(); |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportOutOfMemory(size, stack); |
| } |
| |
| uptr alloc_beg = reinterpret_cast<uptr>(allocated); |
| uptr alloc_end = alloc_beg + needed_size; |
| uptr beg_plus_header = alloc_beg + kChunkHeaderSize; |
| uptr user_beg = beg_plus_header; |
| if (!IsAligned(user_beg, alignment)) |
| user_beg = RoundUpTo(user_beg, alignment); |
| uptr user_end = user_beg + size; |
| CHECK_LE(user_end, alloc_end); |
| uptr chunk_beg = user_beg - kChunkHeaderSize; |
| MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| m->from_memalign = alloc_beg != chunk_beg; |
| CHECK(size); |
| |
| m->cpu_id = GetCpuId(); |
| m->timestamp_ms = GetTimestamp(); |
| m->alloc_context_id = StackDepotPut(*stack); |
| |
| uptr size_rounded_down_to_granularity = |
| RoundDownTo(size, SHADOW_GRANULARITY); |
| if (size_rounded_down_to_granularity) |
| ClearShadow(user_beg, size_rounded_down_to_granularity); |
| |
| MemprofStats &thread_stats = GetCurrentThreadStats(); |
| thread_stats.mallocs++; |
| thread_stats.malloced += size; |
| thread_stats.malloced_overhead += needed_size - size; |
| if (needed_size > SizeClassMap::kMaxSize) |
| thread_stats.malloc_large++; |
| else |
| thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++; |
| |
| void *res = reinterpret_cast<void *>(user_beg); |
| atomic_store(&m->user_requested_size, size, memory_order_release); |
| if (alloc_beg != chunk_beg) { |
| CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); |
| reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m); |
| } |
| MEMPROF_MALLOC_HOOK(res, size); |
| return res; |
| } |
| |
| void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment, |
| BufferedStackTrace *stack, AllocType alloc_type) { |
| uptr p = reinterpret_cast<uptr>(ptr); |
| if (p == 0) |
| return; |
| |
| MEMPROF_FREE_HOOK(ptr); |
| |
| uptr chunk_beg = p - kChunkHeaderSize; |
| MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| |
| u64 user_requested_size = |
| atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); |
| if (memprof_inited && memprof_init_done && !destructing && |
| MemInfoBlockTable.Constructed) { |
| u64 c = GetShadowCount(p, user_requested_size); |
| long curtime = GetTimestamp(); |
| |
| MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, |
| m->cpu_id, GetCpuId()); |
| { |
| SpinMutexLock l(&fallback_mutex); |
| MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); |
| } |
| } |
| |
| MemprofStats &thread_stats = GetCurrentThreadStats(); |
| thread_stats.frees++; |
| thread_stats.freed += user_requested_size; |
| |
| void *alloc_beg = m->AllocBeg(); |
| if (alloc_beg != m) { |
| // Clear the magic value, as allocator internals may overwrite the |
| // contents of deallocated chunk, confusing GetMemprofChunk lookup. |
| reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr); |
| } |
| |
| MemprofThread *t = GetCurrentThread(); |
| if (t) { |
| AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); |
| allocator.Deallocate(cache, alloc_beg); |
| } else { |
| SpinMutexLock l(&fallback_mutex); |
| AllocatorCache *cache = &fallback_allocator_cache; |
| allocator.Deallocate(cache, alloc_beg); |
| } |
| } |
| |
| void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) { |
| CHECK(old_ptr && new_size); |
| uptr p = reinterpret_cast<uptr>(old_ptr); |
| uptr chunk_beg = p - kChunkHeaderSize; |
| MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| |
| MemprofStats &thread_stats = GetCurrentThreadStats(); |
| thread_stats.reallocs++; |
| thread_stats.realloced += new_size; |
| |
| void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC); |
| if (new_ptr) { |
| CHECK_NE(REAL(memcpy), nullptr); |
| uptr memcpy_size = Min(new_size, m->UsedSize()); |
| REAL(memcpy)(new_ptr, old_ptr, memcpy_size); |
| Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC); |
| } |
| return new_ptr; |
| } |
| |
| void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
| if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportCallocOverflow(nmemb, size, stack); |
| } |
| void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC); |
| // If the memory comes from the secondary allocator no need to clear it |
| // as it comes directly from mmap. |
| if (ptr && allocator.FromPrimary(ptr)) |
| REAL(memset)(ptr, 0, nmemb * size); |
| return ptr; |
| } |
| |
| void CommitBack(MemprofThreadLocalMallocStorage *ms, |
| BufferedStackTrace *stack) { |
| AllocatorCache *ac = GetAllocatorCache(ms); |
| allocator.SwallowCache(ac); |
| } |
| |
| // -------------------------- Chunk lookup ---------------------- |
| |
| // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). |
| MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) { |
| if (!alloc_beg) |
| return nullptr; |
| MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get(); |
| if (!p) { |
| if (!allocator.FromPrimary(alloc_beg)) |
| return nullptr; |
| p = reinterpret_cast<MemprofChunk *>(alloc_beg); |
| } |
| // The size is reset to 0 on deallocation (and a min of 1 on |
| // allocation). |
| user_requested_size = |
| atomic_load(&p->user_requested_size, memory_order_acquire); |
| if (user_requested_size) |
| return p; |
| return nullptr; |
| } |
| |
| MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) { |
| void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p)); |
| return GetMemprofChunk(alloc_beg, user_requested_size); |
| } |
| |
| uptr AllocationSize(uptr p) { |
| u64 user_requested_size; |
| MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size); |
| if (!m) |
| return 0; |
| if (m->Beg() != p) |
| return 0; |
| return user_requested_size; |
| } |
| |
| void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); } |
| |
| void PrintStats() { allocator.PrintStats(); } |
| |
| void ForceLock() { |
| allocator.ForceLock(); |
| fallback_mutex.Lock(); |
| } |
| |
| void ForceUnlock() { |
| fallback_mutex.Unlock(); |
| allocator.ForceUnlock(); |
| } |
| }; |
| |
| static Allocator instance(LINKER_INITIALIZED); |
| |
| static MemprofAllocator &get_allocator() { return instance.allocator; } |
| |
| void InitializeAllocator() { instance.InitLinkerInitialized(); } |
| |
| void MemprofThreadLocalMallocStorage::CommitBack() { |
| GET_STACK_TRACE_MALLOC; |
| instance.CommitBack(this, &stack); |
| } |
| |
| void PrintInternalAllocatorStats() { instance.PrintStats(); } |
| |
| void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { |
| instance.Deallocate(ptr, 0, 0, stack, alloc_type); |
| } |
| |
| void memprof_delete(void *ptr, uptr size, uptr alignment, |
| BufferedStackTrace *stack, AllocType alloc_type) { |
| instance.Deallocate(ptr, size, alignment, stack, alloc_type); |
| } |
| |
| void *memprof_malloc(uptr size, BufferedStackTrace *stack) { |
| return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); |
| } |
| |
| void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
| return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); |
| } |
| |
| void *memprof_reallocarray(void *p, uptr nmemb, uptr size, |
| BufferedStackTrace *stack) { |
| if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
| errno = errno_ENOMEM; |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportReallocArrayOverflow(nmemb, size, stack); |
| } |
| return memprof_realloc(p, nmemb * size, stack); |
| } |
| |
| void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) { |
| if (!p) |
| return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); |
| if (size == 0) { |
| if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { |
| instance.Deallocate(p, 0, 0, stack, FROM_MALLOC); |
| return nullptr; |
| } |
| // Allocate a size of 1 if we shouldn't free() on Realloc to 0 |
| size = 1; |
| } |
| return SetErrnoOnNull(instance.Reallocate(p, size, stack)); |
| } |
| |
| void *memprof_valloc(uptr size, BufferedStackTrace *stack) { |
| return SetErrnoOnNull( |
| instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC)); |
| } |
| |
| void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) { |
| uptr PageSize = GetPageSizeCached(); |
| if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { |
| errno = errno_ENOMEM; |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportPvallocOverflow(size, stack); |
| } |
| // pvalloc(0) should allocate one page. |
| size = size ? RoundUpTo(size, PageSize) : PageSize; |
| return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC)); |
| } |
| |
| void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, |
| AllocType alloc_type) { |
| if (UNLIKELY(!IsPowerOfTwo(alignment))) { |
| errno = errno_EINVAL; |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportInvalidAllocationAlignment(alignment, stack); |
| } |
| return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type)); |
| } |
| |
| void *memprof_aligned_alloc(uptr alignment, uptr size, |
| BufferedStackTrace *stack) { |
| if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { |
| errno = errno_EINVAL; |
| if (AllocatorMayReturnNull()) |
| return nullptr; |
| ReportInvalidAlignedAllocAlignment(size, alignment, stack); |
| } |
| return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC)); |
| } |
| |
| int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, |
| BufferedStackTrace *stack) { |
| if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { |
| if (AllocatorMayReturnNull()) |
| return errno_EINVAL; |
| ReportInvalidPosixMemalignAlignment(alignment, stack); |
| } |
| void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC); |
| if (UNLIKELY(!ptr)) |
| // OOM error is already taken care of by Allocate. |
| return errno_ENOMEM; |
| CHECK(IsAligned((uptr)ptr, alignment)); |
| *memptr = ptr; |
| return 0; |
| } |
| |
| uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) { |
| if (!ptr) |
| return 0; |
| uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr)); |
| return usable_size; |
| } |
| |
| void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) { |
| instance.SetRssLimitExceeded(limit_exceeded); |
| } |
| |
| } // namespace __memprof |
| |
| // ---------------------- Interface ---------------- {{{1 |
| using namespace __memprof; |
| |
| #if !SANITIZER_SUPPORTS_WEAK_HOOKS |
| // Provide default (no-op) implementation of malloc hooks. |
| SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr, |
| uptr size) { |
| (void)ptr; |
| (void)size; |
| } |
| |
| SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) { |
| (void)ptr; |
| } |
| #endif |
| |
| uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } |
| |
| int __sanitizer_get_ownership(const void *p) { |
| return memprof_malloc_usable_size(p, 0, 0) != 0; |
| } |
| |
| uptr __sanitizer_get_allocated_size(const void *p) { |
| return memprof_malloc_usable_size(p, 0, 0); |
| } |
| |
| int __memprof_profile_dump() { |
| instance.FinishAndPrint(); |
| // In the future we may want to return non-zero if there are any errors |
| // detected during the dumping process. |
| return 0; |
| } |