Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: tools/android/heap_profiler/heap_profiler.c

Issue 323893002: [Android] Introduce libheap_profiler for memory profiling. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address pasko@#13 + rebase against move trees in 323873004 Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This is a OS-independent* module which purpose is tracking allocations and
6 // their call sites (stack traces). It is able to deal with hole punching
7 // (read: munmap). Also, it is very fast and its presence in the system its
pasko 2014/06/23 20:09:23 nit (sorry, I'm picky in inappropriate places quit
Primiano Tucci (use gerrit) 2014/06/24 08:43:32 Done.
8 // barely noticeable, even if tracing *all* the processes.
9 // This module does NOT know how to deal with stack unwinding. The caller must
10 // do that and pass the addresses of the unwound stack.
11 // * (Modulo three lines for mutexes.)
12 //
13 // Exposed API:
14 // void heap_profiler_init(HeapStats*);
15 // void heap_profiler_alloc(addr, size, stack_frames, depth, flags);
16 // void heap_profiler_free(addr, size); (size == 0 means free entire region).
17 //
18 // The profiling information is tracked into two data structures:
19 // 1) A RB-Tree of non overlapping virtual memory regions (VMAs) sorted by their
pasko 2014/06/23 20:09:23 nit: s/non overlapping/non-overlapping/
Primiano Tucci (use gerrit) 2014/06/24 08:43:33 Done.
20 // start addr. Each entry tracks the start-end addresses and points to the
21 // stack trace which created that allocation (see below).
22 // 2) A (hash) table of stack traces. In general the #allocations >> #call sites
23 // which create those allocations. In order to avoid duplicating the latter,
24 // they are stored distinctly in this hash table and used by reference.
25 //
26 // / Process virtual address space \
27 // +------+ +------+ +------+
28 // | VMA1 | | VMA2 | | VMA3 | <- VMAs (a RB-Tree underneath)
29 // +------+ +------+ +------+
30 // Len: 12 Len: 4 Len: 4
31 // | | | stack_traces
32 // | | | +-----------+--------------+
33 // | | | | Alloc tot | stack frames +
34 // | | | +-----------+--------------+
35 // +------------|-------------+------------> | 16 | 0x1234 .... |
36 // | +-----------+--------------+
37 // +--------------------------> | 4 | 0x5678 .... |
38 // +-----------+--------------+
39 // (A hash-table underneath)
40 //
41 // Final note: the memory for both 1) and 2) entries is carved out from two
42 // static pools (i.e. stack_traces and vmas). The pools are treated as
43 // a sbrk essentially, and are kept compact by reusing freed elements (hence
44 // having a freelist for each of them).
45 //
46 // All the internal (static) functions here assume that the |lock| is held.
47
48 #include <assert.h>
49 #include <string.h>
50
51 // Platform-dependent mutex boilerplate.
52 #if defined(__linux__) || defined(__ANDROID__)
53 #include <pthread.h>
54 #define DEFINE_MUTEX(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
55 #define LOCK_MUTEX(x) pthread_mutex_lock(&x)
56 #define UNLOCK_MUTEX(x) pthread_mutex_unlock(&x)
57 #else
58 #error OS not supported.
59 #endif
60
61 #include "tools/android/heap_profiler/heap_profiler.h"
62
63
64 static DEFINE_MUTEX(lock);
65
66 // |stats| contains the global tracking metadata and is the entry point which
67 // is read by the heap_dump tool.
68 static HeapStats* stats;
69
70 // +---------------------------------------------------------------------------+
71 // + Stack traces hash-table +
72 // +---------------------------------------------------------------------------+
73 #define ST_ENTRIES_MAX (64 * 1024)
74 #define ST_HASHTABLE_BUCKETS (64 * 1024) /* Must be a power of 2. */
75
76 static StacktraceEntry stack_traces[ST_ENTRIES_MAX];
77 static StacktraceEntry* stack_traces_freelist;
78 static StacktraceEntry* stack_traces_ht[ST_HASHTABLE_BUCKETS];
79
80 // Looks up a stack trace from the stack frames. Creates a new one if necessary.
81 static StacktraceEntry* record_stacktrace(uintptr_t* frames, uint32_t depth) {
82 if (depth == 0)
83 return NULL;
84
85 if (depth > HEAP_PROFILER_MAX_DEPTH)
86 depth = HEAP_PROFILER_MAX_DEPTH;
87
88 uint32_t i;
89 uintptr_t hash = 0;
90 for (i = 0; i < depth; ++i)
91 hash = (hash << 1) ^ (frames[i]);
92 const uint32_t slot = hash & (ST_HASHTABLE_BUCKETS - 1);
93 StacktraceEntry* st = stack_traces_ht[slot];
94
95 // Look for an existing entry in the hash-table.
96 const size_t frames_length = depth * sizeof(uintptr_t);
97 while (st != NULL && st->hash != hash &&
98 memcmp(frames, st->frames, frames_length) != 0) {
99 st = st->next;
100 }
101
102 // Is none is found, create a new one from the stack_traces array and add it
pasko 2014/06/23 20:09:23 None is found
Primiano Tucci (use gerrit) 2014/06/24 08:43:32 Actually that was an "If"
103 // to the hash-table.
104 if (st == NULL) {
105 // Get a free element either from the freelist or from the pool.
106 if (stack_traces_freelist != NULL) {
107 st = stack_traces_freelist;
108 stack_traces_freelist = stack_traces_freelist->next;
109 } else if (stats->max_stack_traces < ST_ENTRIES_MAX) {
110 st = &stack_traces[stats->max_stack_traces];
111 ++stats->max_stack_traces;
112 } else {
113 return NULL;
114 }
115
116 memset(st, 0, sizeof(*st));
117 memcpy(st->frames, frames, frames_length);
118 st->hash = hash;
119 st->next = stack_traces_ht[slot];
120 stack_traces_ht[slot] = st;
121 ++stats->num_stack_traces;
122 }
123
124 return st;
125 }
126
127 // Frees up a stack trace and appends it to the corresponding freelist.
128 static void free_stacktrace(StacktraceEntry* st) {
129 assert(st->alloc_bytes == 0);
130 const uint32_t slot = st->hash & (ST_HASHTABLE_BUCKETS - 1);
131
132 // The expected load factor of the hash-table is very low. Frees should be
133 // pretty rare. Hence don't bother with a doubly linked list, might cost more.
134 StacktraceEntry** prev = &stack_traces_ht[slot];
135 while (*prev != st)
136 prev = &((*prev)->next);
137
138 // Remove from the hash-table bucket.
139 assert(*prev == st);
140 *prev = st->next;
141
142 // Add to the freelist.
143 st->next = stack_traces_freelist;
144 stack_traces_freelist = st;
145 --stats->num_stack_traces;
146 }
147
148 // +---------------------------------------------------------------------------+
149 // + VMAs RB-tree +
150 // +---------------------------------------------------------------------------+
151 #define VMA_ENTRIES_MAX (256 * 1024)
152
153 static VMA vmas[VMA_ENTRIES_MAX];
154 static VMA* vmas_freelist;
155 static RB_HEAD(HeapEntriesTree, VMA) vmas_tree = RB_INITIALIZER(&vmas_tree);
156
157 // Comparator used by the RB-Tree (mind the overflow, avoid arith on addresses).
158 static int vmas_tree_cmp(VMA *e1, VMA *e2) {
159 if (e1->start < e2->start)
160 return -1;
161 if (e1->start > e2->start)
162 return 1;
163 return 0;
164 }
165
166 RB_PROTOTYPE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp);
167 RB_GENERATE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp);
168
169 // Allocates a new VMA and inserts it in the tree.
170 static VMA* insert_vma(
171 uintptr_t start, uintptr_t end, StacktraceEntry* st, uint32_t flags) {
172 VMA* vma = NULL;
173
174 // First of all, get a free element either from the freelist or from the pool.
175 if (vmas_freelist != NULL) {
176 vma = vmas_freelist;
177 vmas_freelist = vma->next_free;
178 } else if (stats->max_allocs < VMA_ENTRIES_MAX) {
179 vma = &vmas[stats->max_allocs];
180 ++stats->max_allocs;
181 } else {
182 return NULL; // OOM.
183 }
184
185 vma->start = start;
186 vma->end = end;
187 vma->st = st;
188 vma->flags = flags;
189 vma->next_free = NULL;
190 RB_INSERT(HeapEntriesTree, &vmas_tree, vma);
191 ++stats->num_allocs;
192 return vma;
193 }
194
195 // Deletes all the vmas in the range [addr, addr+size[ dealing with partial
196 // frees and hole punching. Note that in the general case this function might
197 // need to deal with very unfortunate cases, as below:
198 //
199 // VMA tree @ begin: [ VMA 1 ]----[ VMA 2 ]-------[ VMA 3 ][ VMA 4 ]---[ VMA 5 ]
200 // Deletion range: [xxxxxxxxxxxxxxxxxxxx]
201 // VMA tree @ end: [ VMA 1 ]----[VMA2]----------------------[VMA4]---[ VMA 5 ]
202 // VMA3 has to be deleted and VMA 2,4 shrunk.
203 static uint32_t delete_vmas_in_range(void* addr, size_t size) {
204 uintptr_t del_start = (uintptr_t) addr;
205 uintptr_t del_end = del_start + size - 1;
206 uint32_t flags = 0;
207
208 VMA* vma = NULL;
209 VMA* next_vma = RB_ROOT(&vmas_tree);
210
211 // Lookup the first (by address) relevant VMA to initiate the deletion walk.
212 // At the end of the loop next_vma is either:
213 // - the closest VMA starting before (or exactly at) the start of the deletion
214 // range (i.e. addr == del_start).
215 // - the first VMA inside the deletion range.
216 // - the first VMA after the deletion range iff the range was already empty
217 // (in this case the next loop will just bail out doing nothing).
218 // - NULL: iff the entire tree is empty (as above).
219 while (next_vma != NULL) {
220 vma = next_vma;
221 if (vma->start > del_start) {
222 next_vma = RB_LEFT(vma, rb_node);
223 } else if (vma->end < del_start) {
224 next_vma = RB_RIGHT(vma, rb_node);
225 } else { // vma->start <= del_start && vma->end >= del_start
226 break;
227 }
228 }
229
230 // Now scan the VMAs linearly deleting chunks (or eventually the whole VMAs)
231 // until passing the end of the deleting region.
232 next_vma = vma;
233 while (next_vma != NULL) {
234 vma = next_vma;
235 next_vma = RB_NEXT(HeapEntriesTree, &vmas_tree, vma);
236
237 if (size != 0) {
238 // In the general case we stop passed the end of the deletion range.
239 if (vma->start > del_end)
240 break;
241
242 // This deals with the case of the first VMA laying before the range.
243 if (vma->end < del_start)
244 continue;
245 } else {
246 // size == 0 is a special case. It means deleting only the vma which
247 // starts exactly @ |del_start| if any (for dealing with free(ptr)).
pasko 2014/06/23 20:09:23 nit: s/@/at/ please
Primiano Tucci (use gerrit) 2014/06/24 08:43:33 Done.
248 if (vma->start > del_start)
249 break;
250 if (vma->start < del_start)
251 continue;
252 del_end = vma->end;
253 }
254
255 // Reached this point the VMA must overlap (partially or completely) with
256 // the deletion range.
257 assert(!(vma->start > del_end || vma->end < del_start));
258
259 StacktraceEntry* st = vma->st;
260 flags |= vma->flags;
261 uintptr_t freed_bytes = 0; // Bytes freed in this cycle.
262
263 if (del_start <= vma->start) {
264 if (del_end >= vma->end) {
265 // Complete overlap. Delete full VMA. Note: the range might might still
266 // overlap with the next vmas.
267 // Begin: ------[vma.start vma.end]-[next vma]
268 // Del range: [xxxxxxxxxxxxxxxxxxxxxxxxxxxx]
269 // Result: -----------------------------[next vma]
270 // Note: at the next iteration we'll deal with the next vma.
271 freed_bytes = vma->end - vma->start + 1;
272 RB_REMOVE(HeapEntriesTree, &vmas_tree, vma);
273
274 // Clean-up, so heap_dump can tell this is a free entry and skip it.
275 vma->start = vma->end = 0;
276 vma->st = NULL;
277
278 // Put in the freelist.
279 vma->next_free = vmas_freelist;
280 vmas_freelist = vma;
281 --stats->num_allocs;
282 } else {
283 // Partial overlap at beginning. Cut first part and shrink the vma.
284 // Begin: ------[vma.start vma.end]-[next vma]
285 // Del range: [xxxxxx]
286 // Result: ------------[start vma.end]-[next vma]
287 freed_bytes = del_end - vma->start + 1;
288 vma->start = del_end + 1;
289 // No need to update the tree even if we changed the key. The keys are
290 // still monotonic (because the ranges are guaranteed to not overlap).
291 }
292 } else {
293 if (del_end >= vma->end) {
294 // Partial overlap at end. Cut last part and shrink the vma left.
295 // Begin: ------[vma.start vma.end]-[next vma]
296 // Del range: [xxxxxx]
297 // Result: ------[vma.start vma.end]-[next vma]
298 // Note: at the next iteration we'll deal with the next vma.
299 freed_bytes = vma->end - del_start + 1;
300 vma->end = del_start - 1;
301 } else {
302 // Hole punching. Requires creating an extra vma.
303 // Begin: ------[vma.start vma.end]-[next vma]
304 // Del range: [xxx]
305 // Result: ------[ vma 1 ]-----[ vma 2 ]-[next vma]
306 freed_bytes = del_end - del_start + 1;
307 const uintptr_t old_end = vma->end;
308 vma->end = del_start - 1;
309 insert_vma(del_end + 1, old_end, st, vma->flags);
pasko 2014/06/23 20:09:23 we may OOM here, the worst consequence seems to be
Primiano Tucci (use gerrit) 2014/06/24 08:43:33 Hmm which assert? This shouldn't hit asserts in ca
pasko 2014/06/24 13:21:45 I thought it would assert on the line 257 when we
Primiano Tucci (use gerrit) 2014/06/24 15:10:56 Hmm no, as a general principle we never *barf* if
310 }
311 }
312 // Now update the StackTraceEntry the VMA was pointing to, eventually
313 // freeing it up.
314 assert(st->alloc_bytes >= freed_bytes);
315 st->alloc_bytes -= freed_bytes;
316 if (st->alloc_bytes == 0)
317 free_stacktrace(st);
318 stats->total_alloc_bytes -= freed_bytes;
319 }
320 return flags;
321 }
322
323 // +---------------------------------------------------------------------------+
324 // + Library entry points (refer to heap_profiler.h for API doc). +
325 // +---------------------------------------------------------------------------+
326 void heap_profiler_free(void* addr, size_t size, uint32_t* old_flags) {
327 assert(size == 0 || ((uintptr_t) addr + (size - 1)) >= (uintptr_t) addr);
328
329 LOCK_MUTEX(lock);
330 uint32_t flags = delete_vmas_in_range(addr, size);
331 UNLOCK_MUTEX(lock);
332
333 if (old_flags != NULL)
334 *old_flags = flags;
335 }
336
337 void heap_profiler_alloc(void* addr, size_t size, uintptr_t* frames,
338 uint32_t depth, uint32_t flags) {
339 if (depth > HEAP_PROFILER_MAX_DEPTH)
340 depth = HEAP_PROFILER_MAX_DEPTH;
341
342 if (size == 0) // Apps calling malloc(0), sometimes it happens.
343 return;
344
345 const uintptr_t start = (uintptr_t) addr;
346 const uintptr_t end = start + (size - 1);
347 assert(start <= end);
348
349 LOCK_MUTEX(lock);
350
351 delete_vmas_in_range(addr, size);
pasko 2014/06/23 20:09:23 ugh, having to remove ranges is counter-intuitive
Primiano Tucci (use gerrit) 2014/06/24 08:43:33 This is to be paranoid in the case we miss some fr
pasko 2014/06/24 13:21:45 I would argue that it can be actionable. If the me
Primiano Tucci (use gerrit) 2014/06/24 15:10:56 Really? You stop using a device just because you k
352
353 StacktraceEntry* st = record_stacktrace(frames, depth);
354 if (st != NULL) {
355 VMA* vma = insert_vma(start, end, st, flags);
356 if (vma != NULL) {
357 st->alloc_bytes += size;
358 stats->total_alloc_bytes += size;
359 }
360 }
361
362 UNLOCK_MUTEX(lock);
363 }
364
365 void heap_profiler_init(HeapStats* heap_stats) {
366 LOCK_MUTEX(lock);
367
368 assert(stats == NULL);
369 stats = heap_stats;
370 memset(stats, 0, sizeof(HeapStats));
pasko 2014/06/23 20:09:23 isn't this memory mapped from /dev/zero?
Primiano Tucci (use gerrit) 2014/06/24 08:43:33 This is essentially for the tests, which init/clea
371 stats->magic_start = HEAP_PROFILER_MAGIC_MARKER;
372 stats->allocs = &vmas[0];
373 stats->stack_traces = &stack_traces[0];
374
375 UNLOCK_MUTEX(lock);
376 }
377
378 void heap_profiler_cleanup(void) {
379 LOCK_MUTEX(lock);
380
381 assert(stats != NULL);
382 memset(stack_traces, 0, sizeof(StacktraceEntry) * stats->max_stack_traces);
383 memset(stack_traces_ht, 0, sizeof(stack_traces_ht));
384 stack_traces_freelist = NULL;
385
386 memset(vmas, 0, sizeof(VMA) * stats->max_allocs);
387 vmas_freelist = NULL;
388 RB_INIT(&vmas_tree);
389
390 stats = NULL;
391
392 UNLOCK_MUTEX(lock);
393 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698