blob: 06fa163568e9131b92af9e9e1cf6a571402b4590 [file] [log] [blame]
//===-- head_find.c ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file compiles into a dylib and can be used on darwin to find data that
// is contained in active malloc blocks. To use this make the project, then
// load the shared library in a debug session while you are stopped:
//
// (lldb) process load /path/to/libheap.dylib
//
// Now you can use the "find_pointer_in_heap" and "find_cstring_in_heap"
// functions in the expression parser.
//
// This will grep everything in all active allocation blocks and print and
// malloc blocks that contain the pointer 0x112233000000:
//
// (lldb) expression find_pointer_in_heap (0x112233000000)
//
// This will grep everything in all active allocation blocks and print and
// malloc blocks that contain the C string "hello" (as a substring, no
// NULL termination included):
//
// (lldb) expression find_cstring_in_heap ("hello")
//
// The results will be printed to the STDOUT of the inferior program. The
// return value of the "find_pointer_in_heap" function is the number of
// pointer references that were found. A quick example shows
//
// (lldb) expr find_pointer_in_heap(0x0000000104000410)
// (uint32_t) $5 = 0x00000002
// 0x104000740: 0x0000000104000410 found in malloc block 0x104000730 + 16 (malloc_size = 48)
// 0x100820060: 0x0000000104000410 found in malloc block 0x100820000 + 96 (malloc_size = 4096)
//
// From the above output we see that 0x104000410 was found in the malloc block
// at 0x104000730 and 0x100820000. If we want to see what these blocks are, we
// can display the memory for this block using the "address" ("A" for short)
// format. The address format shows pointers, and if those pointers point to
// objects that have symbols or know data contents, it will display information
// about the pointers:
//
// (lldb) memory read --format address --count 1 0x104000730
// 0x104000730: 0x0000000100002460 (void *)0x0000000100002488: MyString
//
// We can see that the first block is a "MyString" object that contains our
// pointer value at offset 16.
//
// Looking at the next pointers, are a bit more tricky:
// (lldb) memory read -fA 0x100820000 -c1
// 0x100820000: 0x4f545541a1a1a1a1
// (lldb) memory read 0x100820000
// 0x100820000: a1 a1 a1 a1 41 55 54 4f 52 45 4c 45 41 53 45 21 ....AUTORELEASE!
// 0x100820010: 78 00 82 00 01 00 00 00 60 f9 e8 75 ff 7f 00 00 x.......`..u....
//
// This is an objective C auto release pool object that contains our pointer.
// C++ classes will show up if they are virtual as something like:
// (lldb) memory read --format address --count 1 0x104008000
// 0x104008000: 0x109008000 vtable for lldb_private::Process
//
// This is a clue that the 0x104008000 is a "lldb_private::Process *".
//===----------------------------------------------------------------------===//
#include <assert.h>
#include <ctype.h>
#include <mach/mach.h>
#include <malloc/malloc.h>
#include <stack_logging.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
typedef void range_callback_t (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size);
typedef void zone_callback_t (void *info, const malloc_zone_t *zone);
struct range_callback_info_t
{
zone_callback_t *zone_callback;
range_callback_t *range_callback;
void *baton;
};
enum data_type_t
{
eDataTypeAddress,
eDataTypeContainsData
};
struct aligned_data_t
{
const uint8_t *buffer;
uint32_t size;
uint32_t align;
};
struct range_contains_data_callback_info_t
{
data_type_t type;
const void *lookup_addr;
union
{
uintptr_t addr;
aligned_data_t data;
};
uint32_t match_count;
bool done;
};
struct malloc_match
{
void *addr;
intptr_t size;
intptr_t offset;
};
std::vector<malloc_match> g_matches;
const void *g_lookup_addr = 0;
//----------------------------------------------------------------------
// task_peek
//
// Reads memory from this tasks address space. This callback is needed
// by the code that iterates through all of the malloc blocks to read
// the memory in this process.
//----------------------------------------------------------------------
static kern_return_t
task_peek (task_t task, vm_address_t remote_address, vm_size_t size, void **local_memory)
{
*local_memory = (void*) remote_address;
return KERN_SUCCESS;
}
static const void
foreach_zone_in_this_process (range_callback_info_t *info)
{
if (info == NULL || info->zone_callback == NULL)
return;
vm_address_t *zones = NULL;
unsigned int num_zones = 0;
kern_return_t err = malloc_get_all_zones (0, task_peek, &zones, &num_zones);
if (KERN_SUCCESS == err)
{
for (unsigned int i=0; i<num_zones; ++i)
{
info->zone_callback (info, (const malloc_zone_t *)zones[i]);
}
}
}
//----------------------------------------------------------------------
// dump_malloc_block_callback
//
// A simple callback that will dump each malloc block and all available
// info from the enumeration callback perpective.
//----------------------------------------------------------------------
static void
dump_malloc_block_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
{
printf ("task = 0x%4.4x: baton = %p, type = %u, ptr_addr = 0x%llx + 0x%llu\n", task, baton, type, ptr_addr, ptr_size);
}
static void
ranges_callback (task_t task, void *baton, unsigned type, vm_range_t *ptrs, unsigned count)
{
range_callback_info_t *info = (range_callback_info_t *)baton;
while(count--) {
info->range_callback (task, info->baton, type, ptrs->address, ptrs->size);
ptrs++;
}
}
static void
enumerate_range_in_zone (void *baton, const malloc_zone_t *zone)
{
range_callback_info_t *info = (range_callback_info_t *)baton;
if (zone && zone->introspect)
zone->introspect->enumerator (mach_task_self(),
info,
MALLOC_PTR_IN_USE_RANGE_TYPE,
(vm_address_t)zone,
task_peek,
ranges_callback);
}
static void
range_info_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
{
const uint64_t end_addr = ptr_addr + ptr_size;
range_contains_data_callback_info_t *info = (range_contains_data_callback_info_t *)baton;
switch (info->type)
{
case eDataTypeAddress:
if (ptr_addr <= info->addr && info->addr < end_addr)
{
++info->match_count;
malloc_match match = { (void *)ptr_addr, ptr_size, info->addr - ptr_addr };
g_matches.push_back(match);
}
break;
case eDataTypeContainsData:
{
const uint32_t size = info->data.size;
if (size < ptr_size) // Make sure this block can contain this data
{
uint8_t *ptr_data = NULL;
if (task_peek (task, ptr_addr, ptr_size, (void **)&ptr_data) == KERN_SUCCESS)
{
const void *buffer = info->data.buffer;
assert (ptr_data);
const uint32_t align = info->data.align;
for (uint64_t addr = ptr_addr;
addr < end_addr && ((end_addr - addr) >= size);
addr += align, ptr_data += align)
{
if (memcmp (buffer, ptr_data, size) == 0)
{
++info->match_count;
malloc_match match = { (void *)ptr_addr, ptr_size, addr - ptr_addr };
g_matches.push_back(match);
}
}
}
else
{
printf ("0x%llx: error: couldn't read %llu bytes\n", ptr_addr, ptr_size);
}
}
}
break;
}
}
//----------------------------------------------------------------------
// find_pointer_in_heap
//
// Finds a pointer value inside one or more currently valid malloc
// blocks.
//----------------------------------------------------------------------
malloc_match *
find_pointer_in_heap (const void * addr)
{
g_matches.clear();
// Setup "info" to look for a malloc block that contains data
// that is the a pointer
range_contains_data_callback_info_t data_info;
data_info.type = eDataTypeContainsData; // Check each block for data
g_lookup_addr = addr;
data_info.data.buffer = (uint8_t *)&addr; // What data? The pointer value passed in
data_info.data.size = sizeof(addr); // How many bytes? The byte size of a pointer
data_info.data.align = sizeof(addr); // Align to a pointer byte size
data_info.match_count = 0; // Initialize the match count to zero
data_info.done = false; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
foreach_zone_in_this_process (&info);
if (g_matches.empty())
return NULL;
malloc_match match = { NULL, 0, 0 };
g_matches.push_back(match);
return g_matches.data();
}
//----------------------------------------------------------------------
// find_cstring_in_heap
//
// Finds a C string inside one or more currently valid malloc blocks.
//----------------------------------------------------------------------
malloc_match *
find_cstring_in_heap (const char *s)
{
g_matches.clear();
if (s == NULL || s[0] == '\0')
{
printf ("error: invalid argument (empty cstring)\n");
return NULL;
}
// Setup "info" to look for a malloc block that contains data
// that is the C string passed in aligned on a 1 byte boundary
range_contains_data_callback_info_t data_info;
data_info.type = eDataTypeContainsData; // Check each block for data
g_lookup_addr = s; // If an expression was used, then fill in the resolved address we are looking up
data_info.data.buffer = (uint8_t *)s; // What data? The C string passed in
data_info.data.size = strlen(s); // How many bytes? The length of the C string
data_info.data.align = 1; // Data doesn't need to be aligned, so set the alignment to 1
data_info.match_count = 0; // Initialize the match count to zero
data_info.done = false; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
foreach_zone_in_this_process (&info);
if (g_matches.empty())
return NULL;
malloc_match match = { NULL, 0, 0 };
g_matches.push_back(match);
return g_matches.data();
}
//----------------------------------------------------------------------
// find_block_for_address
//
// Find the malloc block that whose address range contains "addr".
//----------------------------------------------------------------------
malloc_match *
find_block_for_address (const void *addr)
{
g_matches.clear();
// Setup "info" to look for a malloc block that contains data
// that is the C string passed in aligned on a 1 byte boundary
range_contains_data_callback_info_t data_info;
g_lookup_addr = addr; // If an expression was used, then fill in the resolved address we are looking up
data_info.type = eDataTypeAddress; // Check each block to see if the block contains the address passed in
data_info.addr = (uintptr_t)addr; // What data? The C string passed in
data_info.match_count = 0; // Initialize the match count to zero
data_info.done = false; // Set done to false so searching doesn't stop
range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
foreach_zone_in_this_process (&info);
if (g_matches.empty())
return NULL;
malloc_match match = { NULL, 0, 0 };
g_matches.push_back(match);
return g_matches.data();
}