OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // This is a OS-independent* module which purpose is tracking allocations and |
| 6 // their call sites (stack traces). It is able to deal with hole punching |
| 7 // (read: munmap). Also, it has low overhead and its presence in the system its |
| 8 // barely noticeable, even if tracing *all* the processes. |
| 9 // This module does NOT know how to deal with stack unwinding. The caller must |
| 10 // do that and pass the addresses of the unwound stack. |
| 11 // * (Modulo three lines for mutexes.) |
| 12 // |
| 13 // Exposed API: |
| 14 // void heap_profiler_init(HeapStats*); |
| 15 // void heap_profiler_alloc(addr, size, stack_frames, depth, flags); |
| 16 // void heap_profiler_free(addr, size); (size == 0 means free entire region). |
| 17 // |
| 18 // The profiling information is tracked into two data structures: |
| 19 // 1) A RB-Tree of non-overlapping virtual memory regions (VMAs) sorted by their |
| 20 // start addr. Each entry tracks the start-end addresses and points to the |
| 21 // stack trace which created that allocation (see below). |
| 22 // 2) A (hash) table of stack traces. In general the #allocations >> #call sites |
| 23 // which create those allocations. In order to avoid duplicating the latter, |
| 24 // they are stored distinctly in this hash table and used by reference. |
| 25 // |
| 26 // / Process virtual address space \ |
| 27 // +------+ +------+ +------+ |
| 28 // | VMA1 | | VMA2 | | VMA3 | <- VMAs (a RB-Tree underneath) |
| 29 // +------+ +------+ +------+ |
| 30 // Len: 12 Len: 4 Len: 4 |
| 31 // | | | stack_traces |
| 32 // | | | +-----------+--------------+ |
| 33 // | | | | Alloc tot | stack frames + |
| 34 // | | | +-----------+--------------+ |
| 35 // +------------|-------------+------------> | 16 | 0x1234 .... | |
| 36 // | +-----------+--------------+ |
| 37 // +--------------------------> | 4 | 0x5678 .... | |
| 38 // +-----------+--------------+ |
| 39 // (A hash-table underneath) |
| 40 // |
| 41 // Final note: the memory for both 1) and 2) entries is carved out from two |
| 42 // static pools (i.e. stack_traces and vmas). The pools are treated as |
| 43 // a sbrk essentially, and are kept compact by reusing freed elements (hence |
| 44 // having a freelist for each of them). |
| 45 // |
| 46 // All the internal (static) functions here assume that the |lock| is held. |
| 47 |
| 48 #include <assert.h> |
| 49 #include <string.h> |
| 50 |
| 51 // Platform-dependent mutex boilerplate. |
| 52 #if defined(__linux__) || defined(__ANDROID__) |
| 53 #include <pthread.h> |
| 54 #define DEFINE_MUTEX(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER |
| 55 #define LOCK_MUTEX(x) pthread_mutex_lock(&x) |
| 56 #define UNLOCK_MUTEX(x) pthread_mutex_unlock(&x) |
| 57 #else |
| 58 #error OS not supported. |
| 59 #endif |
| 60 |
| 61 #include "tools/android/heap_profiler/heap_profiler.h" |
| 62 |
| 63 |
| 64 static DEFINE_MUTEX(lock); |
| 65 |
| 66 // |stats| contains the global tracking metadata and is the entry point which |
| 67 // is read by the heap_dump tool. |
| 68 static HeapStats* stats; |
| 69 |
| 70 // +---------------------------------------------------------------------------+ |
| 71 // + Stack traces hash-table + |
| 72 // +---------------------------------------------------------------------------+ |
| 73 #define ST_ENTRIES_MAX (64 * 1024) |
| 74 #define ST_HASHTABLE_BUCKETS (64 * 1024) /* Must be a power of 2. */ |
| 75 |
| 76 static StacktraceEntry stack_traces[ST_ENTRIES_MAX]; |
| 77 static StacktraceEntry* stack_traces_freelist; |
| 78 static StacktraceEntry* stack_traces_ht[ST_HASHTABLE_BUCKETS]; |
| 79 |
| 80 // Looks up a stack trace from the stack frames. Creates a new one if necessary. |
| 81 static StacktraceEntry* record_stacktrace(uintptr_t* frames, uint32_t depth) { |
| 82 if (depth == 0) |
| 83 return NULL; |
| 84 |
| 85 if (depth > HEAP_PROFILER_MAX_DEPTH) |
| 86 depth = HEAP_PROFILER_MAX_DEPTH; |
| 87 |
| 88 uint32_t i; |
| 89 uintptr_t hash = 0; |
| 90 for (i = 0; i < depth; ++i) |
| 91 hash = (hash << 1) ^ (frames[i]); |
| 92 const uint32_t slot = hash & (ST_HASHTABLE_BUCKETS - 1); |
| 93 StacktraceEntry* st = stack_traces_ht[slot]; |
| 94 |
| 95 // Look for an existing entry in the hash-table. |
| 96 const size_t frames_length = depth * sizeof(uintptr_t); |
| 97 while (st != NULL && st->hash != hash && |
| 98 memcmp(frames, st->frames, frames_length) != 0) { |
| 99 st = st->next; |
| 100 } |
| 101 |
| 102 // If not found, create a new one from the stack_traces array and add it to |
| 103 // the hash-table. |
| 104 if (st == NULL) { |
| 105 // Get a free element either from the freelist or from the pool. |
| 106 if (stack_traces_freelist != NULL) { |
| 107 st = stack_traces_freelist; |
| 108 stack_traces_freelist = stack_traces_freelist->next; |
| 109 } else if (stats->max_stack_traces < ST_ENTRIES_MAX) { |
| 110 st = &stack_traces[stats->max_stack_traces]; |
| 111 ++stats->max_stack_traces; |
| 112 } else { |
| 113 return NULL; |
| 114 } |
| 115 |
| 116 memset(st, 0, sizeof(*st)); |
| 117 memcpy(st->frames, frames, frames_length); |
| 118 st->hash = hash; |
| 119 st->next = stack_traces_ht[slot]; |
| 120 stack_traces_ht[slot] = st; |
| 121 ++stats->num_stack_traces; |
| 122 } |
| 123 |
| 124 return st; |
| 125 } |
| 126 |
| 127 // Frees up a stack trace and appends it to the corresponding freelist. |
| 128 static void free_stacktrace(StacktraceEntry* st) { |
| 129 assert(st->alloc_bytes == 0); |
| 130 const uint32_t slot = st->hash & (ST_HASHTABLE_BUCKETS - 1); |
| 131 |
| 132 // The expected load factor of the hash-table is very low. Frees should be |
| 133 // pretty rare. Hence don't bother with a doubly linked list, might cost more. |
| 134 StacktraceEntry** prev = &stack_traces_ht[slot]; |
| 135 while (*prev != st) |
| 136 prev = &((*prev)->next); |
| 137 |
| 138 // Remove from the hash-table bucket. |
| 139 assert(*prev == st); |
| 140 *prev = st->next; |
| 141 |
| 142 // Add to the freelist. |
| 143 st->next = stack_traces_freelist; |
| 144 stack_traces_freelist = st; |
| 145 --stats->num_stack_traces; |
| 146 } |
| 147 |
| 148 // +---------------------------------------------------------------------------+ |
| 149 // + VMAs RB-tree + |
| 150 // +---------------------------------------------------------------------------+ |
| 151 #define VMA_ENTRIES_MAX (256 * 1024) |
| 152 |
| 153 static VMA vmas[VMA_ENTRIES_MAX]; |
| 154 static VMA* vmas_freelist; |
| 155 static RB_HEAD(HeapEntriesTree, VMA) vmas_tree = RB_INITIALIZER(&vmas_tree); |
| 156 |
| 157 // Comparator used by the RB-Tree (mind the overflow, avoid arith on addresses). |
| 158 static int vmas_tree_cmp(VMA *e1, VMA *e2) { |
| 159 if (e1->start < e2->start) |
| 160 return -1; |
| 161 if (e1->start > e2->start) |
| 162 return 1; |
| 163 return 0; |
| 164 } |
| 165 |
| 166 RB_PROTOTYPE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp); |
| 167 RB_GENERATE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp); |
| 168 |
| 169 // Allocates a new VMA and inserts it in the tree. |
| 170 static VMA* insert_vma( |
| 171 uintptr_t start, uintptr_t end, StacktraceEntry* st, uint32_t flags) { |
| 172 VMA* vma = NULL; |
| 173 |
| 174 // First of all, get a free element either from the freelist or from the pool. |
| 175 if (vmas_freelist != NULL) { |
| 176 vma = vmas_freelist; |
| 177 vmas_freelist = vma->next_free; |
| 178 } else if (stats->max_allocs < VMA_ENTRIES_MAX) { |
| 179 vma = &vmas[stats->max_allocs]; |
| 180 ++stats->max_allocs; |
| 181 } else { |
| 182 return NULL; // OOM. |
| 183 } |
| 184 |
| 185 vma->start = start; |
| 186 vma->end = end; |
| 187 vma->st = st; |
| 188 vma->flags = flags; |
| 189 vma->next_free = NULL; |
| 190 RB_INSERT(HeapEntriesTree, &vmas_tree, vma); |
| 191 ++stats->num_allocs; |
| 192 return vma; |
| 193 } |
| 194 |
| 195 // Deletes all the vmas in the range [addr, addr+size[ dealing with partial |
| 196 // frees and hole punching. Note that in the general case this function might |
| 197 // need to deal with very unfortunate cases, as below: |
| 198 // |
| 199 // VMA tree @ begin: [ VMA 1 ]----[ VMA 2 ]-------[ VMA 3 ][ VMA 4 ]---[ VMA 5 ] |
| 200 // Deletion range: [xxxxxxxxxxxxxxxxxxxx] |
| 201 // VMA tree @ end: [ VMA 1 ]----[VMA2]----------------------[VMA4]---[ VMA 5 ] |
| 202 // VMA3 has to be deleted and VMA 2,4 shrunk. |
| 203 static uint32_t delete_vmas_in_range(void* addr, size_t size) { |
| 204 uintptr_t del_start = (uintptr_t) addr; |
| 205 uintptr_t del_end = del_start + size - 1; |
| 206 uint32_t flags = 0; |
| 207 |
| 208 VMA* vma = NULL; |
| 209 VMA* next_vma = RB_ROOT(&vmas_tree); |
| 210 |
| 211 // Lookup the first (by address) relevant VMA to initiate the deletion walk. |
| 212 // At the end of the loop next_vma is either: |
| 213 // - the closest VMA starting before (or exactly at) the start of the deletion |
| 214 // range (i.e. addr == del_start). |
| 215 // - the first VMA inside the deletion range. |
| 216 // - the first VMA after the deletion range iff the range was already empty |
| 217 // (in this case the next loop will just bail out doing nothing). |
| 218 // - NULL: iff the entire tree is empty (as above). |
| 219 while (next_vma != NULL) { |
| 220 vma = next_vma; |
| 221 if (vma->start > del_start) { |
| 222 next_vma = RB_LEFT(vma, rb_node); |
| 223 } else if (vma->end < del_start) { |
| 224 next_vma = RB_RIGHT(vma, rb_node); |
| 225 } else { // vma->start <= del_start && vma->end >= del_start |
| 226 break; |
| 227 } |
| 228 } |
| 229 |
| 230 // Now scan the VMAs linearly deleting chunks (or eventually the whole VMAs) |
| 231 // until passing the end of the deleting region. |
| 232 next_vma = vma; |
| 233 while (next_vma != NULL) { |
| 234 vma = next_vma; |
| 235 next_vma = RB_NEXT(HeapEntriesTree, &vmas_tree, vma); |
| 236 |
| 237 if (size != 0) { |
| 238 // In the general case we stop passed the end of the deletion range. |
| 239 if (vma->start > del_end) |
| 240 break; |
| 241 |
| 242 // This deals with the case of the first VMA laying before the range. |
| 243 if (vma->end < del_start) |
| 244 continue; |
| 245 } else { |
| 246 // size == 0 is a special case. It means deleting only the vma which |
| 247 // starts exactly at |del_start| if any (for dealing with free(ptr)). |
| 248 if (vma->start > del_start) |
| 249 break; |
| 250 if (vma->start < del_start) |
| 251 continue; |
| 252 del_end = vma->end; |
| 253 } |
| 254 |
| 255 // Reached this point the VMA must overlap (partially or completely) with |
| 256 // the deletion range. |
| 257 assert(!(vma->start > del_end || vma->end < del_start)); |
| 258 |
| 259 StacktraceEntry* st = vma->st; |
| 260 flags |= vma->flags; |
| 261 uintptr_t freed_bytes = 0; // Bytes freed in this cycle. |
| 262 |
| 263 if (del_start <= vma->start) { |
| 264 if (del_end >= vma->end) { |
| 265 // Complete overlap. Delete full VMA. Note: the range might might still |
| 266 // overlap with the next vmas. |
| 267 // Begin: ------[vma.start vma.end]-[next vma] |
| 268 // Del range: [xxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
| 269 // Result: -----------------------------[next vma] |
| 270 // Note: at the next iteration we'll deal with the next vma. |
| 271 freed_bytes = vma->end - vma->start + 1; |
| 272 RB_REMOVE(HeapEntriesTree, &vmas_tree, vma); |
| 273 |
| 274 // Clean-up, so heap_dump can tell this is a free entry and skip it. |
| 275 vma->start = vma->end = 0; |
| 276 vma->st = NULL; |
| 277 |
| 278 // Put in the freelist. |
| 279 vma->next_free = vmas_freelist; |
| 280 vmas_freelist = vma; |
| 281 --stats->num_allocs; |
| 282 } else { |
| 283 // Partial overlap at beginning. Cut first part and shrink the vma. |
| 284 // Begin: ------[vma.start vma.end]-[next vma] |
| 285 // Del range: [xxxxxx] |
| 286 // Result: ------------[start vma.end]-[next vma] |
| 287 freed_bytes = del_end - vma->start + 1; |
| 288 vma->start = del_end + 1; |
| 289 // No need to update the tree even if we changed the key. The keys are |
| 290 // still monotonic (because the ranges are guaranteed to not overlap). |
| 291 } |
| 292 } else { |
| 293 if (del_end >= vma->end) { |
| 294 // Partial overlap at end. Cut last part and shrink the vma left. |
| 295 // Begin: ------[vma.start vma.end]-[next vma] |
| 296 // Del range: [xxxxxx] |
| 297 // Result: ------[vma.start vma.end]-[next vma] |
| 298 // Note: at the next iteration we'll deal with the next vma. |
| 299 freed_bytes = vma->end - del_start + 1; |
| 300 vma->end = del_start - 1; |
| 301 } else { |
| 302 // Hole punching. Requires creating an extra vma. |
| 303 // Begin: ------[vma.start vma.end]-[next vma] |
| 304 // Del range: [xxx] |
| 305 // Result: ------[ vma 1 ]-----[ vma 2 ]-[next vma] |
| 306 freed_bytes = del_end - del_start + 1; |
| 307 const uintptr_t old_end = vma->end; |
| 308 vma->end = del_start - 1; |
| 309 |
| 310 // In case of OOM, don't count the 2nd vma we failed to allocate. |
| 311 if (insert_vma(del_end + 1, old_end, st, vma->flags) == NULL) |
| 312 freed_bytes += (old_end - del_end); |
| 313 } |
| 314 } |
| 315 // Now update the StackTraceEntry the VMA was pointing to, eventually |
| 316 // freeing it up. |
| 317 assert(st->alloc_bytes >= freed_bytes); |
| 318 st->alloc_bytes -= freed_bytes; |
| 319 if (st->alloc_bytes == 0) |
| 320 free_stacktrace(st); |
| 321 stats->total_alloc_bytes -= freed_bytes; |
| 322 } |
| 323 return flags; |
| 324 } |
| 325 |
| 326 // +---------------------------------------------------------------------------+ |
| 327 // + Library entry points (refer to heap_profiler.h for API doc). + |
| 328 // +---------------------------------------------------------------------------+ |
| 329 void heap_profiler_free(void* addr, size_t size, uint32_t* old_flags) { |
| 330 assert(size == 0 || ((uintptr_t) addr + (size - 1)) >= (uintptr_t) addr); |
| 331 |
| 332 LOCK_MUTEX(lock); |
| 333 uint32_t flags = delete_vmas_in_range(addr, size); |
| 334 UNLOCK_MUTEX(lock); |
| 335 |
| 336 if (old_flags != NULL) |
| 337 *old_flags = flags; |
| 338 } |
| 339 |
| 340 void heap_profiler_alloc(void* addr, size_t size, uintptr_t* frames, |
| 341 uint32_t depth, uint32_t flags) { |
| 342 if (depth > HEAP_PROFILER_MAX_DEPTH) |
| 343 depth = HEAP_PROFILER_MAX_DEPTH; |
| 344 |
| 345 if (size == 0) // Apps calling malloc(0), sometimes it happens. |
| 346 return; |
| 347 |
| 348 const uintptr_t start = (uintptr_t) addr; |
| 349 const uintptr_t end = start + (size - 1); |
| 350 assert(start <= end); |
| 351 |
| 352 LOCK_MUTEX(lock); |
| 353 |
| 354 delete_vmas_in_range(addr, size); |
| 355 |
| 356 StacktraceEntry* st = record_stacktrace(frames, depth); |
| 357 if (st != NULL) { |
| 358 VMA* vma = insert_vma(start, end, st, flags); |
| 359 if (vma != NULL) { |
| 360 st->alloc_bytes += size; |
| 361 stats->total_alloc_bytes += size; |
| 362 } |
| 363 } |
| 364 |
| 365 UNLOCK_MUTEX(lock); |
| 366 } |
| 367 |
| 368 void heap_profiler_init(HeapStats* heap_stats) { |
| 369 LOCK_MUTEX(lock); |
| 370 |
| 371 assert(stats == NULL); |
| 372 stats = heap_stats; |
| 373 memset(stats, 0, sizeof(HeapStats)); |
| 374 stats->magic_start = HEAP_PROFILER_MAGIC_MARKER; |
| 375 stats->allocs = &vmas[0]; |
| 376 stats->stack_traces = &stack_traces[0]; |
| 377 |
| 378 UNLOCK_MUTEX(lock); |
| 379 } |
| 380 |
| 381 void heap_profiler_cleanup(void) { |
| 382 LOCK_MUTEX(lock); |
| 383 |
| 384 assert(stats != NULL); |
| 385 memset(stack_traces, 0, sizeof(StacktraceEntry) * stats->max_stack_traces); |
| 386 memset(stack_traces_ht, 0, sizeof(stack_traces_ht)); |
| 387 stack_traces_freelist = NULL; |
| 388 |
| 389 memset(vmas, 0, sizeof(VMA) * stats->max_allocs); |
| 390 vmas_freelist = NULL; |
| 391 RB_INIT(&vmas_tree); |
| 392 |
| 393 stats = NULL; |
| 394 |
| 395 UNLOCK_MUTEX(lock); |
| 396 } |
OLD | NEW |