OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This is a OS-independent* module which purpose is tracking allocations and | |
6 // their call sites (stack traces). It is able to deal with hole punching | |
7 // (read: munmap). Also, it is very fast and its presence in the system its | |
pasko
2014/06/23 20:09:23
nit (sorry, I'm picky in inappropriate places quit
Primiano Tucci (use gerrit)
2014/06/24 08:43:32
Done.
| |
8 // barely noticeable, even if tracing *all* the processes. | |
9 // This module does NOT know how to deal with stack unwinding. The caller must | |
10 // do that and pass the addresses of the unwound stack. | |
11 // * (Modulo three lines for mutexes.) | |
12 // | |
13 // Exposed API: | |
14 // void heap_profiler_init(HeapStats*); | |
15 // void heap_profiler_alloc(addr, size, stack_frames, depth, flags); | |
16 // void heap_profiler_free(addr, size); (size == 0 means free entire region). | |
17 // | |
18 // The profiling information is tracked into two data structures: | |
19 // 1) A RB-Tree of non overlapping virtual memory regions (VMAs) sorted by their | |
pasko
2014/06/23 20:09:23
nit: s/non overlapping/non-overlapping/
Primiano Tucci (use gerrit)
2014/06/24 08:43:33
Done.
| |
20 // start addr. Each entry tracks the start-end addresses and points to the | |
21 // stack trace which created that allocation (see below). | |
22 // 2) A (hash) table of stack traces. In general the #allocations >> #call sites | |
23 // which create those allocations. In order to avoid duplicating the latter, | |
24 // they are stored distinctly in this hash table and used by reference. | |
25 // | |
26 // / Process virtual address space \ | |
27 // +------+ +------+ +------+ | |
28 // | VMA1 | | VMA2 | | VMA3 | <- VMAs (a RB-Tree underneath) | |
29 // +------+ +------+ +------+ | |
30 // Len: 12 Len: 4 Len: 4 | |
31 // | | | stack_traces | |
32 // | | | +-----------+--------------+ | |
33 // | | | | Alloc tot | stack frames + | |
34 // | | | +-----------+--------------+ | |
35 // +------------|-------------+------------> | 16 | 0x1234 .... | | |
36 // | +-----------+--------------+ | |
37 // +--------------------------> | 4 | 0x5678 .... | | |
38 // +-----------+--------------+ | |
39 // (A hash-table underneath) | |
40 // | |
41 // Final note: the memory for both 1) and 2) entries is carved out from two | |
42 // static pools (i.e. stack_traces and vmas). The pools are treated as | |
43 // a sbrk essentially, and are kept compact by reusing freed elements (hence | |
44 // having a freelist for each of them). | |
45 // | |
46 // All the internal (static) functions here assume that the |lock| is held. | |
47 | |
48 #include <assert.h> | |
49 #include <string.h> | |
50 | |
51 // Platform-dependent mutex boilerplate. | |
52 #if defined(__linux__) || defined(__ANDROID__) | |
53 #include <pthread.h> | |
54 #define DEFINE_MUTEX(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER | |
55 #define LOCK_MUTEX(x) pthread_mutex_lock(&x) | |
56 #define UNLOCK_MUTEX(x) pthread_mutex_unlock(&x) | |
57 #else | |
58 #error OS not supported. | |
59 #endif | |
60 | |
61 #include "tools/android/heap_profiler/heap_profiler.h" | |
62 | |
63 | |
64 static DEFINE_MUTEX(lock); | |
65 | |
66 // |stats| contains the global tracking metadata and is the entry point which | |
67 // is read by the heap_dump tool. | |
68 static HeapStats* stats; | |
69 | |
70 // +---------------------------------------------------------------------------+ | |
71 // + Stack traces hash-table + | |
72 // +---------------------------------------------------------------------------+ | |
73 #define ST_ENTRIES_MAX (64 * 1024) | |
74 #define ST_HASHTABLE_BUCKETS (64 * 1024) /* Must be a power of 2. */ | |
75 | |
76 static StacktraceEntry stack_traces[ST_ENTRIES_MAX]; | |
77 static StacktraceEntry* stack_traces_freelist; | |
78 static StacktraceEntry* stack_traces_ht[ST_HASHTABLE_BUCKETS]; | |
79 | |
80 // Looks up a stack trace from the stack frames. Creates a new one if necessary. | |
81 static StacktraceEntry* record_stacktrace(uintptr_t* frames, uint32_t depth) { | |
82 if (depth == 0) | |
83 return NULL; | |
84 | |
85 if (depth > HEAP_PROFILER_MAX_DEPTH) | |
86 depth = HEAP_PROFILER_MAX_DEPTH; | |
87 | |
88 uint32_t i; | |
89 uintptr_t hash = 0; | |
90 for (i = 0; i < depth; ++i) | |
91 hash = (hash << 1) ^ (frames[i]); | |
92 const uint32_t slot = hash & (ST_HASHTABLE_BUCKETS - 1); | |
93 StacktraceEntry* st = stack_traces_ht[slot]; | |
94 | |
95 // Look for an existing entry in the hash-table. | |
96 const size_t frames_length = depth * sizeof(uintptr_t); | |
97 while (st != NULL && st->hash != hash && | |
98 memcmp(frames, st->frames, frames_length) != 0) { | |
99 st = st->next; | |
100 } | |
101 | |
102 // Is none is found, create a new one from the stack_traces array and add it | |
pasko
2014/06/23 20:09:23
None is found
Primiano Tucci (use gerrit)
2014/06/24 08:43:32
Actually that was an "If"
| |
103 // to the hash-table. | |
104 if (st == NULL) { | |
105 // Get a free element either from the freelist or from the pool. | |
106 if (stack_traces_freelist != NULL) { | |
107 st = stack_traces_freelist; | |
108 stack_traces_freelist = stack_traces_freelist->next; | |
109 } else if (stats->max_stack_traces < ST_ENTRIES_MAX) { | |
110 st = &stack_traces[stats->max_stack_traces]; | |
111 ++stats->max_stack_traces; | |
112 } else { | |
113 return NULL; | |
114 } | |
115 | |
116 memset(st, 0, sizeof(*st)); | |
117 memcpy(st->frames, frames, frames_length); | |
118 st->hash = hash; | |
119 st->next = stack_traces_ht[slot]; | |
120 stack_traces_ht[slot] = st; | |
121 ++stats->num_stack_traces; | |
122 } | |
123 | |
124 return st; | |
125 } | |
126 | |
127 // Frees up a stack trace and appends it to the corresponding freelist. | |
128 static void free_stacktrace(StacktraceEntry* st) { | |
129 assert(st->alloc_bytes == 0); | |
130 const uint32_t slot = st->hash & (ST_HASHTABLE_BUCKETS - 1); | |
131 | |
132 // The expected load factor of the hash-table is very low. Frees should be | |
133 // pretty rare. Hence don't bother with a doubly linked list, might cost more. | |
134 StacktraceEntry** prev = &stack_traces_ht[slot]; | |
135 while (*prev != st) | |
136 prev = &((*prev)->next); | |
137 | |
138 // Remove from the hash-table bucket. | |
139 assert(*prev == st); | |
140 *prev = st->next; | |
141 | |
142 // Add to the freelist. | |
143 st->next = stack_traces_freelist; | |
144 stack_traces_freelist = st; | |
145 --stats->num_stack_traces; | |
146 } | |
147 | |
148 // +---------------------------------------------------------------------------+ | |
149 // + VMAs RB-tree + | |
150 // +---------------------------------------------------------------------------+ | |
151 #define VMA_ENTRIES_MAX (256 * 1024) | |
152 | |
153 static VMA vmas[VMA_ENTRIES_MAX]; | |
154 static VMA* vmas_freelist; | |
155 static RB_HEAD(HeapEntriesTree, VMA) vmas_tree = RB_INITIALIZER(&vmas_tree); | |
156 | |
157 // Comparator used by the RB-Tree (mind the overflow, avoid arith on addresses). | |
158 static int vmas_tree_cmp(VMA *e1, VMA *e2) { | |
159 if (e1->start < e2->start) | |
160 return -1; | |
161 if (e1->start > e2->start) | |
162 return 1; | |
163 return 0; | |
164 } | |
165 | |
166 RB_PROTOTYPE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp); | |
167 RB_GENERATE(HeapEntriesTree, VMA, rb_node, vmas_tree_cmp); | |
168 | |
169 // Allocates a new VMA and inserts it in the tree. | |
170 static VMA* insert_vma( | |
171 uintptr_t start, uintptr_t end, StacktraceEntry* st, uint32_t flags) { | |
172 VMA* vma = NULL; | |
173 | |
174 // First of all, get a free element either from the freelist or from the pool. | |
175 if (vmas_freelist != NULL) { | |
176 vma = vmas_freelist; | |
177 vmas_freelist = vma->next_free; | |
178 } else if (stats->max_allocs < VMA_ENTRIES_MAX) { | |
179 vma = &vmas[stats->max_allocs]; | |
180 ++stats->max_allocs; | |
181 } else { | |
182 return NULL; // OOM. | |
183 } | |
184 | |
185 vma->start = start; | |
186 vma->end = end; | |
187 vma->st = st; | |
188 vma->flags = flags; | |
189 vma->next_free = NULL; | |
190 RB_INSERT(HeapEntriesTree, &vmas_tree, vma); | |
191 ++stats->num_allocs; | |
192 return vma; | |
193 } | |
194 | |
195 // Deletes all the vmas in the range [addr, addr+size[ dealing with partial | |
196 // frees and hole punching. Note that in the general case this function might | |
197 // need to deal with very unfortunate cases, as below: | |
198 // | |
199 // VMA tree @ begin: [ VMA 1 ]----[ VMA 2 ]-------[ VMA 3 ][ VMA 4 ]---[ VMA 5 ] | |
200 // Deletion range: [xxxxxxxxxxxxxxxxxxxx] | |
201 // VMA tree @ end: [ VMA 1 ]----[VMA2]----------------------[VMA4]---[ VMA 5 ] | |
202 // VMA3 has to be deleted and VMA 2,4 shrunk. | |
203 static uint32_t delete_vmas_in_range(void* addr, size_t size) { | |
204 uintptr_t del_start = (uintptr_t) addr; | |
205 uintptr_t del_end = del_start + size - 1; | |
206 uint32_t flags = 0; | |
207 | |
208 VMA* vma = NULL; | |
209 VMA* next_vma = RB_ROOT(&vmas_tree); | |
210 | |
211 // Lookup the first (by address) relevant VMA to initiate the deletion walk. | |
212 // At the end of the loop next_vma is either: | |
213 // - the closest VMA starting before (or exactly at) the start of the deletion | |
214 // range (i.e. addr == del_start). | |
215 // - the first VMA inside the deletion range. | |
216 // - the first VMA after the deletion range iff the range was already empty | |
217 // (in this case the next loop will just bail out doing nothing). | |
218 // - NULL: iff the entire tree is empty (as above). | |
219 while (next_vma != NULL) { | |
220 vma = next_vma; | |
221 if (vma->start > del_start) { | |
222 next_vma = RB_LEFT(vma, rb_node); | |
223 } else if (vma->end < del_start) { | |
224 next_vma = RB_RIGHT(vma, rb_node); | |
225 } else { // vma->start <= del_start && vma->end >= del_start | |
226 break; | |
227 } | |
228 } | |
229 | |
230 // Now scan the VMAs linearly deleting chunks (or eventually the whole VMAs) | |
231 // until passing the end of the deleting region. | |
232 next_vma = vma; | |
233 while (next_vma != NULL) { | |
234 vma = next_vma; | |
235 next_vma = RB_NEXT(HeapEntriesTree, &vmas_tree, vma); | |
236 | |
237 if (size != 0) { | |
238 // In the general case we stop passed the end of the deletion range. | |
239 if (vma->start > del_end) | |
240 break; | |
241 | |
242 // This deals with the case of the first VMA laying before the range. | |
243 if (vma->end < del_start) | |
244 continue; | |
245 } else { | |
246 // size == 0 is a special case. It means deleting only the vma which | |
247 // starts exactly @ |del_start| if any (for dealing with free(ptr)). | |
pasko
2014/06/23 20:09:23
nit:
s/@/at/ please
Primiano Tucci (use gerrit)
2014/06/24 08:43:33
Done.
| |
248 if (vma->start > del_start) | |
249 break; | |
250 if (vma->start < del_start) | |
251 continue; | |
252 del_end = vma->end; | |
253 } | |
254 | |
255 // Reached this point the VMA must overlap (partially or completely) with | |
256 // the deletion range. | |
257 assert(!(vma->start > del_end || vma->end < del_start)); | |
258 | |
259 StacktraceEntry* st = vma->st; | |
260 flags |= vma->flags; | |
261 uintptr_t freed_bytes = 0; // Bytes freed in this cycle. | |
262 | |
263 if (del_start <= vma->start) { | |
264 if (del_end >= vma->end) { | |
265 // Complete overlap. Delete full VMA. Note: the range might might still | |
266 // overlap with the next vmas. | |
267 // Begin: ------[vma.start vma.end]-[next vma] | |
268 // Del range: [xxxxxxxxxxxxxxxxxxxxxxxxxxxx] | |
269 // Result: -----------------------------[next vma] | |
270 // Note: at the next iteration we'll deal with the next vma. | |
271 freed_bytes = vma->end - vma->start + 1; | |
272 RB_REMOVE(HeapEntriesTree, &vmas_tree, vma); | |
273 | |
274 // Clean-up, so heap_dump can tell this is a free entry and skip it. | |
275 vma->start = vma->end = 0; | |
276 vma->st = NULL; | |
277 | |
278 // Put in the freelist. | |
279 vma->next_free = vmas_freelist; | |
280 vmas_freelist = vma; | |
281 --stats->num_allocs; | |
282 } else { | |
283 // Partial overlap at beginning. Cut first part and shrink the vma. | |
284 // Begin: ------[vma.start vma.end]-[next vma] | |
285 // Del range: [xxxxxx] | |
286 // Result: ------------[start vma.end]-[next vma] | |
287 freed_bytes = del_end - vma->start + 1; | |
288 vma->start = del_end + 1; | |
289 // No need to update the tree even if we changed the key. The keys are | |
290 // still monotonic (because the ranges are guaranteed to not overlap). | |
291 } | |
292 } else { | |
293 if (del_end >= vma->end) { | |
294 // Partial overlap at end. Cut last part and shrink the vma left. | |
295 // Begin: ------[vma.start vma.end]-[next vma] | |
296 // Del range: [xxxxxx] | |
297 // Result: ------[vma.start vma.end]-[next vma] | |
298 // Note: at the next iteration we'll deal with the next vma. | |
299 freed_bytes = vma->end - del_start + 1; | |
300 vma->end = del_start - 1; | |
301 } else { | |
302 // Hole punching. Requires creating an extra vma. | |
303 // Begin: ------[vma.start vma.end]-[next vma] | |
304 // Del range: [xxx] | |
305 // Result: ------[ vma 1 ]-----[ vma 2 ]-[next vma] | |
306 freed_bytes = del_end - del_start + 1; | |
307 const uintptr_t old_end = vma->end; | |
308 vma->end = del_start - 1; | |
309 insert_vma(del_end + 1, old_end, st, vma->flags); | |
pasko
2014/06/23 20:09:23
we may OOM here, the worst consequence seems to be
Primiano Tucci (use gerrit)
2014/06/24 08:43:33
Hmm which assert? This shouldn't hit asserts in ca
pasko
2014/06/24 13:21:45
I thought it would assert on the line 257 when we
Primiano Tucci (use gerrit)
2014/06/24 15:10:56
Hmm no, as a general principle we never *barf* if
| |
310 } | |
311 } | |
312 // Now update the StackTraceEntry the VMA was pointing to, eventually | |
313 // freeing it up. | |
314 assert(st->alloc_bytes >= freed_bytes); | |
315 st->alloc_bytes -= freed_bytes; | |
316 if (st->alloc_bytes == 0) | |
317 free_stacktrace(st); | |
318 stats->total_alloc_bytes -= freed_bytes; | |
319 } | |
320 return flags; | |
321 } | |
322 | |
323 // +---------------------------------------------------------------------------+ | |
324 // + Library entry points (refer to heap_profiler.h for API doc). + | |
325 // +---------------------------------------------------------------------------+ | |
326 void heap_profiler_free(void* addr, size_t size, uint32_t* old_flags) { | |
327 assert(size == 0 || ((uintptr_t) addr + (size - 1)) >= (uintptr_t) addr); | |
328 | |
329 LOCK_MUTEX(lock); | |
330 uint32_t flags = delete_vmas_in_range(addr, size); | |
331 UNLOCK_MUTEX(lock); | |
332 | |
333 if (old_flags != NULL) | |
334 *old_flags = flags; | |
335 } | |
336 | |
337 void heap_profiler_alloc(void* addr, size_t size, uintptr_t* frames, | |
338 uint32_t depth, uint32_t flags) { | |
339 if (depth > HEAP_PROFILER_MAX_DEPTH) | |
340 depth = HEAP_PROFILER_MAX_DEPTH; | |
341 | |
342 if (size == 0) // Apps calling malloc(0), sometimes it happens. | |
343 return; | |
344 | |
345 const uintptr_t start = (uintptr_t) addr; | |
346 const uintptr_t end = start + (size - 1); | |
347 assert(start <= end); | |
348 | |
349 LOCK_MUTEX(lock); | |
350 | |
351 delete_vmas_in_range(addr, size); | |
pasko
2014/06/23 20:09:23
ugh, having to remove ranges is counter-intuitive
Primiano Tucci (use gerrit)
2014/06/24 08:43:33
This is to be paranoid in the case we miss some fr
pasko
2014/06/24 13:21:45
I would argue that it can be actionable. If the me
Primiano Tucci (use gerrit)
2014/06/24 15:10:56
Really? You stop using a device just because you k
| |
352 | |
353 StacktraceEntry* st = record_stacktrace(frames, depth); | |
354 if (st != NULL) { | |
355 VMA* vma = insert_vma(start, end, st, flags); | |
356 if (vma != NULL) { | |
357 st->alloc_bytes += size; | |
358 stats->total_alloc_bytes += size; | |
359 } | |
360 } | |
361 | |
362 UNLOCK_MUTEX(lock); | |
363 } | |
364 | |
365 void heap_profiler_init(HeapStats* heap_stats) { | |
366 LOCK_MUTEX(lock); | |
367 | |
368 assert(stats == NULL); | |
369 stats = heap_stats; | |
370 memset(stats, 0, sizeof(HeapStats)); | |
pasko
2014/06/23 20:09:23
isn't this memory mapped from /dev/zero?
Primiano Tucci (use gerrit)
2014/06/24 08:43:33
This is essentially for the tests, which init/clea
| |
371 stats->magic_start = HEAP_PROFILER_MAGIC_MARKER; | |
372 stats->allocs = &vmas[0]; | |
373 stats->stack_traces = &stack_traces[0]; | |
374 | |
375 UNLOCK_MUTEX(lock); | |
376 } | |
377 | |
378 void heap_profiler_cleanup(void) { | |
379 LOCK_MUTEX(lock); | |
380 | |
381 assert(stats != NULL); | |
382 memset(stack_traces, 0, sizeof(StacktraceEntry) * stats->max_stack_traces); | |
383 memset(stack_traces_ht, 0, sizeof(stack_traces_ht)); | |
384 stack_traces_freelist = NULL; | |
385 | |
386 memset(vmas, 0, sizeof(VMA) * stats->max_allocs); | |
387 vmas_freelist = NULL; | |
388 RB_INIT(&vmas_tree); | |
389 | |
390 stats = NULL; | |
391 | |
392 UNLOCK_MUTEX(lock); | |
393 } | |
OLD | NEW |