OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """The deep heap profiler script for Chrome.""" | |
7 | |
8 from datetime import datetime | |
9 import json | |
10 import os | |
11 import re | |
12 import subprocess | |
13 import sys | |
14 import tempfile | |
15 | |
16 BUCKET_ID = 5 | |
17 VIRTUAL = 0 | |
18 COMMITTED = 1 | |
19 ALLOC_COUNT = 2 | |
20 FREE_COUNT = 3 | |
21 NULL_REGEX = re.compile('') | |
22 PPROF_PATH = os.path.join(os.path.dirname(__file__), | |
23 os.pardir, | |
24 os.pardir, | |
25 'third_party', | |
26 'tcmalloc', | |
27 'chromium', | |
28 'src', | |
29 'pprof') | |
30 | |
31 # Heap Profile Dump versions | |
32 | |
33 # DUMP_DEEP_1 DOES NOT distinct mmap regions and malloc chunks. | |
34 # Their stacktraces DO contain mmap* or tc-* at their tops. | |
35 # They should be processed by POLICY_DEEP_1. | |
36 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
37 | |
38 # DUMP_DEEP_2 DOES distinct mmap regions and malloc chunks. | |
39 # Their stacktraces still DO contain mmap* or tc-*. | |
40 # They should be processed by POLICY_DEEP_1. | |
41 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
42 | |
43 # DUMP_DEEP_3 DOES distinct mmap regions and malloc chunks. | |
44 # Their stacktraces DO NOT contain mmap* or tc-*. | |
45 # They should be processed by POLICY_DEEP_2. | |
46 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
47 | |
48 # Heap Profile Policy versions | |
49 | |
50 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | |
51 # mmap regions are distincted w/ mmap frames in the pattern column. | |
52 POLICY_DEEP_1 = 'POLICY_DEEP_1' | |
53 | |
54 # POLICY_DEEP_2 DOES include allocation_type columns. | |
55 # mmap regions are distincted w/ the allocation_type column. | |
56 POLICY_DEEP_2 = 'POLICY_DEEP_2' | |
57 | |
58 # TODO(dmikurube): Avoid global variables. | |
59 address_symbol_dict = {} | |
60 appeared_addresses = set() | |
61 components = [] | |
62 | |
63 | |
64 class Policy(object): | |
65 | |
66 def __init__(self, name, mmap, pattern): | |
67 self.name = name | |
68 self.mmap = mmap | |
69 self.condition = re.compile(pattern + r'\Z') | |
70 | |
71 | |
72 def get_component(policy_list, bucket, mmap): | |
73 """Returns a component name which a given bucket belongs to. | |
74 | |
75 Args: | |
76 policy_list: A list containing Policy objects. (Parsed policy data by | |
77 parse_policy.) | |
78 bucket: A Bucket object to be searched for. | |
79 mmap: True if searching for a mmap region. | |
80 | |
81 Returns: | |
82 A string representing a component name. | |
83 """ | |
84 if not bucket: | |
85 return 'no-bucket' | |
86 if bucket.component: | |
87 return bucket.component | |
88 | |
89 stacktrace = ''.join( | |
90 address_symbol_dict[a] + ' ' for a in bucket.stacktrace).strip() | |
91 | |
92 for policy in policy_list: | |
93 if mmap == policy.mmap and policy.condition.match(stacktrace): | |
94 bucket.component = policy.name | |
95 return policy.name | |
96 | |
97 assert False | |
98 | |
99 | |
100 class Bucket(object): | |
101 | |
102 def __init__(self, stacktrace): | |
103 self.stacktrace = stacktrace | |
104 self.component = '' | |
105 | |
106 | |
107 class Log(object): | |
108 | |
109 """A class representing one dumped log data.""" | |
110 def __init__(self, log_path, buckets): | |
111 self.log_path = log_path | |
112 with open(self.log_path, mode='r') as log_f: | |
113 self.log_lines = log_f.readlines() | |
114 self.log_version = '' | |
115 sys.stderr.write('parsing a log file:%s\n' % log_path) | |
116 self.mmap_stacktrace_lines = [] | |
117 self.malloc_stacktrace_lines = [] | |
118 self.counters = {} | |
119 self.log_time = os.stat(self.log_path).st_mtime | |
120 self.parse_log(buckets) | |
121 | |
122 @staticmethod | |
123 def dump_stacktrace_lines(stacktrace_lines, buckets): | |
124 """Prints a given stacktrace. | |
125 | |
126 Args: | |
127 stacktrace_lines: A list of strings which are valid as stacktraces. | |
128 buckets: A dict mapping bucket ids and their corresponding Bucket | |
129 objects. | |
130 """ | |
131 for l in stacktrace_lines: | |
132 words = l.split() | |
133 bucket = buckets.get(int(words[BUCKET_ID])) | |
134 if not bucket: | |
135 continue | |
136 for i in range(0, BUCKET_ID - 1): | |
137 sys.stdout.write(words[i] + ' ') | |
138 for address in bucket.stacktrace: | |
139 sys.stdout.write((address_symbol_dict.get(address) or address) + ' ') | |
140 sys.stdout.write('\n') | |
141 | |
142 def dump_stacktrace(self, buckets): | |
143 """Prints stacktraces contained in the log. | |
144 | |
145 Args: | |
146 buckets: A dict mapping bucket ids and their corresponding Bucket | |
147 objects. | |
148 """ | |
149 self.dump_stacktrace_lines(self.mmap_stacktrace_lines, buckets) | |
150 self.dump_stacktrace_lines(self.malloc_stacktrace_lines, buckets) | |
151 | |
152 @staticmethod | |
153 def accumulate_size_for_pprof(stacktrace_lines, policy_list, buckets, | |
154 component_name, mmap): | |
155 """Accumulates size of committed chunks and the number of allocated chunks. | |
156 | |
157 Args: | |
158 stacktrace_lines: A list of strings which are valid as stacktraces. | |
159 policy_list: A list containing Policy objects. (Parsed policy data by | |
160 parse_policy.) | |
161 buckets: A dict mapping bucket ids and their corresponding Bucket | |
162 objects. | |
163 component_name: A name of component for filtering. | |
164 mmap: True if searching for a mmap region. | |
165 | |
166 Returns: | |
167 Two integers which are the accumulated size of committed regions and the | |
168 number of allocated chunks, respectively. | |
169 """ | |
170 com_committed = 0 | |
171 com_allocs = 0 | |
172 for l in stacktrace_lines: | |
173 words = l.split() | |
174 bucket = buckets.get(int(words[BUCKET_ID])) | |
175 if (not bucket or | |
176 (component_name and | |
177 component_name != get_component(policy_list, bucket, mmap))): | |
178 continue | |
179 | |
180 com_committed += int(words[COMMITTED]) | |
181 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | |
182 | |
183 return com_committed, com_allocs | |
184 | |
185 @staticmethod | |
186 def dump_stacktrace_lines_for_pprof(stacktrace_lines, policy_list, | |
187 buckets, component_name, mmap): | |
188 """Prints information of stacktrace lines for pprof. | |
189 | |
190 Args: | |
191 stacktrace_lines: A list of strings which are valid as stacktraces. | |
192 policy_list: A list containing Policy objects. (Parsed policy data by | |
193 parse_policy.) | |
194 buckets: A dict mapping bucket ids and their corresponding Bucket | |
195 objects. | |
196 component_name: A name of component for filtering. | |
197 mmap: True if searching for a mmap region. | |
198 """ | |
199 for l in stacktrace_lines: | |
200 words = l.split() | |
201 bucket = buckets.get(int(words[BUCKET_ID])) | |
202 if (not bucket or | |
203 (component_name and | |
204 component_name != get_component(policy_list, bucket, mmap))): | |
205 continue | |
206 | |
207 sys.stdout.write('%6d: %8s [%6d: %8s] @' % ( | |
208 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
209 words[COMMITTED], | |
210 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
211 words[COMMITTED])) | |
212 for address in bucket.stacktrace: | |
213 sys.stdout.write(' ' + address) | |
214 sys.stdout.write('\n') | |
215 | |
216 def dump_for_pprof(self, policy_list, buckets, mapping_lines, component_name): | |
217 """Converts the log file so it can be processed by pprof. | |
218 | |
219 Args: | |
220 policy_list: A list containing Policy objects. (Parsed policy data by | |
221 parse_policy.) | |
222 buckets: A dict mapping bucket ids and their corresponding Bucket | |
223 objects. | |
224 mapping_lines: A list of strings containing /proc/.../maps. | |
225 component_name: A name of component for filtering. | |
226 """ | |
227 sys.stdout.write('heap profile: ') | |
228 com_committed, com_allocs = self.accumulate_size_for_pprof( | |
229 self.mmap_stacktrace_lines, policy_list, buckets, component_name, | |
230 True) | |
231 add_committed, add_allocs = self.accumulate_size_for_pprof( | |
232 self.malloc_stacktrace_lines, policy_list, buckets, component_name, | |
233 False) | |
234 com_committed += add_committed | |
235 com_allocs += add_allocs | |
236 | |
237 sys.stdout.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | |
238 com_allocs, com_committed, com_allocs, com_committed)) | |
239 | |
240 self.dump_stacktrace_lines_for_pprof( | |
241 self.mmap_stacktrace_lines, policy_list, buckets, component_name, | |
242 True) | |
243 self.dump_stacktrace_lines_for_pprof( | |
244 self.malloc_stacktrace_lines, policy_list, buckets, component_name, | |
245 False) | |
246 | |
247 sys.stdout.write('MAPPED_LIBRARIES:\n') | |
248 for l in mapping_lines: | |
249 sys.stdout.write(l) | |
250 | |
251 @staticmethod | |
252 def check_stacktrace_line(stacktrace_line, buckets): | |
253 """Checks if a given stacktrace_line is valid as stacktrace. | |
254 | |
255 Args: | |
256 stacktrace_line: A string to be checked. | |
257 buckets: A dict mapping bucket ids and their corresponding Bucket | |
258 objects. | |
259 | |
260 Returns: | |
261 True if the given stacktrace_line is valid. | |
262 """ | |
263 words = stacktrace_line.split() | |
264 if len(words) < BUCKET_ID + 1: | |
265 return False | |
266 if words[BUCKET_ID - 1] != '@': | |
267 return False | |
268 bucket = buckets.get(int(words[BUCKET_ID])) | |
269 if bucket: | |
270 for address in bucket.stacktrace: | |
271 appeared_addresses.add(address) | |
272 return True | |
273 | |
274 @staticmethod | |
275 def skip_lines_while(line_number, max_line_number, skipping_condition): | |
276 """Increments line_number until skipping_condition(line_number) is false. | |
277 """ | |
278 while skipping_condition(line_number): | |
279 line_number += 1 | |
280 if line_number >= max_line_number: | |
281 sys.stderr.write('invalid heap profile dump.') | |
282 return line_number | |
283 return line_number | |
284 | |
285 def parse_stacktraces_while_valid(self, buckets, log_lines, ln): | |
286 """Parses stacktrace lines while the lines are valid. | |
287 | |
288 Args: | |
289 buckets: A dict mapping bucket ids and their corresponding Bucket | |
290 objects. | |
291 log_lines: A list of lines to be parsed. | |
292 ln: An integer representing the starting line number in log_lines. | |
293 | |
294 Returns: | |
295 A pair of a list of valid lines and an integer representing the last | |
296 line number in log_lines. | |
297 """ | |
298 ln = self.skip_lines_while( | |
299 ln, len(log_lines), lambda n: not log_lines[n].split()[0].isdigit()) | |
300 stacktrace_lines_start = ln | |
301 ln = self.skip_lines_while( | |
302 ln, len(log_lines), | |
303 lambda n: self.check_stacktrace_line(log_lines[n], buckets)) | |
304 return (log_lines[stacktrace_lines_start:ln], ln) | |
305 | |
306 def parse_stacktraces(self, buckets): | |
307 """Parses lines in self.log_lines as stacktrace. | |
308 | |
309 Valid stacktrace lines are stored into self.mmap_stacktrace_lines and | |
310 self.malloc_stacktrace_lines. | |
311 | |
312 Args: | |
313 buckets: A dict mapping bucket ids and their corresponding Bucket | |
314 objects. | |
315 | |
316 Returns: | |
317 A string representing a version of the stacktrace dump. '' for invalid | |
318 dump. | |
319 """ | |
320 version = '' | |
321 | |
322 # Skip until an identifiable line. | |
323 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
324 ln = self.skip_lines_while( | |
325 0, len(self.log_lines), | |
326 lambda n: not self.log_lines[n].startswith(headers)) | |
327 | |
328 # Identify a version. | |
329 if self.log_lines[ln].startswith('heap profile: '): | |
330 version = self.log_lines[ln][13:].strip() | |
331 if version == DUMP_DEEP_2 or version == DUMP_DEEP_3: | |
332 ln = self.skip_lines_while( | |
333 ln, len(self.log_lines), | |
334 lambda n: self.log_lines[n] != 'MMAP_STACKTRACES:\n') | |
335 else: | |
336 sys.stderr.write(' invalid heap profile dump version:%s\n' % version) | |
337 return '' | |
338 elif self.log_lines[ln] == 'STACKTRACES:\n': | |
339 version = DUMP_DEEP_1 | |
340 elif self.log_lines[ln] == 'MMAP_STACKTRACES:\n': | |
341 version = DUMP_DEEP_2 | |
342 | |
343 if version == DUMP_DEEP_3: | |
344 sys.stderr.write(' heap profile dump version: %s\n' % version) | |
345 (self.mmap_stacktrace_lines, ln) = self.parse_stacktraces_while_valid( | |
346 buckets, self.log_lines, ln) | |
347 ln = self.skip_lines_while( | |
348 ln, len(self.log_lines), | |
349 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n') | |
350 (self.malloc_stacktrace_lines, ln) = self.parse_stacktraces_while_valid( | |
351 buckets, self.log_lines, ln) | |
352 return version | |
353 | |
354 elif version == DUMP_DEEP_2: | |
355 sys.stderr.write(' heap profile dump version: %s\n' % version) | |
356 (self.mmap_stacktrace_lines, ln) = self.parse_stacktraces_while_valid( | |
357 buckets, self.log_lines, ln) | |
358 ln = self.skip_lines_while( | |
359 ln, len(self.log_lines), | |
360 lambda n: self.log_lines[n] != 'MALLOC_STACKTRACES:\n') | |
361 (self.malloc_stacktrace_lines, ln) = self.parse_stacktraces_while_valid( | |
362 buckets, self.log_lines, ln) | |
363 self.malloc_stacktrace_lines.extend(self.mmap_stacktrace_lines) | |
364 self.mmap_stacktrace_lines = [] | |
365 return version | |
366 | |
367 elif version == DUMP_DEEP_1: | |
368 sys.stderr.write(' heap profile dump version: %s\n' % version) | |
369 (self.malloc_stacktrace_lines, ln) = self.parse_stacktraces_while_valid( | |
370 buckets, self.log_lines, ln) | |
371 return version | |
372 | |
373 else: | |
374 sys.stderr.write(' invalid heap profile dump version:%s\n' % version) | |
375 return '' | |
376 | |
377 def parse_global_stats(self): | |
378 """Parses lines in self.log_lines as global stats.""" | |
379 ln = self.skip_lines_while( | |
380 0, len(self.log_lines), | |
381 lambda n: self.log_lines[n] != 'GLOBAL_STATS:\n') | |
382 | |
383 for prefix in ['total', 'file', 'anonymous', 'other', 'mmap', 'tcmalloc']: | |
384 ln = self.skip_lines_while( | |
385 ln, len(self.log_lines), | |
386 lambda n: self.log_lines[n].split()[0] != prefix) | |
387 words = self.log_lines[ln].split() | |
388 self.counters[prefix + '_virtual'] = int(words[-2]) | |
389 self.counters[prefix + '_committed'] = int(words[-1]) | |
390 | |
391 def parse_log(self, buckets): | |
392 self.parse_global_stats() | |
393 self.log_version = self.parse_stacktraces(buckets) | |
394 | |
395 @staticmethod | |
396 def accumulate_size_for_policy(stacktrace_lines, | |
397 policy_list, buckets, sizes, mmap): | |
398 for l in stacktrace_lines: | |
399 words = l.split() | |
400 bucket = buckets.get(int(words[BUCKET_ID])) | |
401 component_match = get_component(policy_list, bucket, mmap) | |
402 sizes[component_match] += int(words[COMMITTED]) | |
403 | |
404 if component_match.startswith('tc-'): | |
405 sizes['tc-total-log'] += int(words[COMMITTED]) | |
406 elif component_match.startswith('mmap-'): | |
407 sizes['mmap-total-log'] += int(words[COMMITTED]) | |
408 else: | |
409 sizes['other-total-log'] += int(words[COMMITTED]) | |
410 | |
411 def apply_policy(self, policy_list, buckets, first_log_time): | |
412 """Aggregates the total memory size of each component. | |
413 | |
414 Iterate through all stacktraces and attribute them to one of the components | |
415 based on the policy. It is important to apply policy in right order. | |
416 | |
417 Args: | |
418 policy_list: A list containing Policy objects. (Parsed policy data by | |
419 parse_policy.) | |
420 buckets: A dict mapping bucket ids and their corresponding Bucket | |
421 objects. | |
422 first_log_time: An integer representing time when the first log is | |
423 dumped. | |
424 | |
425 Returns: | |
426 A dict mapping components and their corresponding sizes. | |
427 """ | |
428 | |
429 sys.stderr.write('apply policy:%s\n' % (self.log_path)) | |
430 sizes = dict((c, 0) for c in components) | |
431 | |
432 self.accumulate_size_for_policy(self.mmap_stacktrace_lines, | |
433 policy_list, buckets, sizes, True) | |
434 self.accumulate_size_for_policy(self.malloc_stacktrace_lines, | |
435 policy_list, buckets, sizes, False) | |
436 | |
437 sizes['mmap-no-log'] = self.counters['mmap_committed'] - sizes[ | |
438 'mmap-total-log'] | |
439 sizes['mmap-total-record'] = self.counters['mmap_committed'] | |
440 sizes['mmap-total-record-vm'] = self.counters['mmap_virtual'] | |
441 | |
442 sizes['tc-no-log'] = self.counters['tcmalloc_committed'] - sizes[ | |
443 'tc-total-log'] | |
444 sizes['tc-total-record'] = self.counters['tcmalloc_committed'] | |
445 sizes['tc-unused'] = sizes['mmap-tcmalloc'] - self.counters[ | |
446 'tcmalloc_committed'] | |
447 sizes['tc-total'] = sizes['mmap-tcmalloc'] | |
448 | |
449 for key, value in { 'total': 'total_committed', | |
450 'filemapped': 'file_committed', | |
451 'anonymous': 'anonymous_committed', | |
452 'other': 'other_committed', | |
453 'total-vm': 'total_virtual', | |
454 'filemapped-vm': 'file_virtual', | |
455 'anonymous-vm': 'anonymous_virtual', | |
456 'other-vm': 'other_virtual' }.items(): | |
457 if key in sizes: | |
458 sizes[key] = self.counters[value] | |
459 | |
460 if 'unknown' in sizes: | |
461 sizes['unknown'] = self.counters['total_committed'] - self.counters[ | |
462 'mmap_committed'] | |
463 if 'total-exclude-profiler' in sizes: | |
464 sizes['total-exclude-profiler'] = self.counters[ | |
465 'total_committed'] - sizes['mmap-profiler'] | |
466 if 'hour' in sizes: | |
467 sizes['hour'] = (self.log_time - first_log_time) / 60.0 / 60.0 | |
468 if 'minute' in sizes: | |
469 sizes['minute'] = (self.log_time - first_log_time) / 60.0 | |
470 if 'second' in sizes: | |
471 sizes['second'] = self.log_time - first_log_time | |
472 | |
473 return sizes | |
474 | |
475 @staticmethod | |
476 def accumulate_size_for_expand(stacktrace_lines, policy_list, buckets, | |
477 component_name, depth, sizes, mmap): | |
478 for line in stacktrace_lines: | |
479 words = line.split() | |
480 bucket = buckets.get(int(words[BUCKET_ID])) | |
481 component_match = get_component(policy_list, bucket, mmap) | |
482 if component_match == component_name: | |
483 stacktrace_sequence = '' | |
484 for address in bucket.stacktrace[1 : min(len(bucket.stacktrace), | |
485 1 + depth)]: | |
486 stacktrace_sequence += address_symbol_dict[address] + ' ' | |
487 if not stacktrace_sequence in sizes: | |
488 sizes[stacktrace_sequence] = 0 | |
489 sizes[stacktrace_sequence] += int(words[COMMITTED]) | |
490 | |
491 def expand(self, policy_list, buckets, component_name, depth): | |
492 """Prints all stacktraces in a given component of given depth. | |
493 | |
494 Args: | |
495 policy_list: A list containing Policy objects. (Parsed policy data by | |
496 parse_policy.) | |
497 buckets: A dict mapping bucket ids and their corresponding Bucket | |
498 objects. | |
499 component_name: A name of component for filtering. | |
500 depth: An integer representing depth to be printed. | |
501 """ | |
502 sizes = {} | |
503 | |
504 self.accumulate_size_for_expand( | |
505 self.mmap_stacktrace_lines, policy_list, buckets, component_name, | |
506 depth, sizes, True) | |
507 self.accumulate_size_for_expand( | |
508 self.malloc_stacktrace_lines, policy_list, buckets, component_name, | |
509 depth, sizes, False) | |
510 | |
511 sorted_sizes_list = sorted( | |
512 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | |
513 total = 0 | |
514 for size_pair in sorted_sizes_list: | |
515 sys.stdout.write('%10d %s\n' % (size_pair[1], size_pair[0])) | |
516 total += size_pair[1] | |
517 sys.stderr.write('total: %d\n' % (total)) | |
518 | |
519 | |
520 def update_symbols(symbol_path, mapping_lines, chrome_path): | |
521 """Updates address/symbol mapping on memory and in a .symbol cache file. | |
522 | |
523 It reads cached address/symbol mapping from a .symbol file if it exists. | |
524 Then, it resolves unresolved addresses from a Chrome binary with pprof. | |
525 Both mappings on memory and in a .symbol cache file are updated. | |
526 | |
527 Symbol files are formatted as follows: | |
528 <Address> <Symbol> | |
529 <Address> <Symbol> | |
530 <Address> <Symbol> | |
531 ... | |
532 | |
533 Args: | |
534 symbol_path: A string representing a path for a .symbol file. | |
535 mapping_lines: A list of strings containing /proc/.../maps. | |
536 chrome_path: A string representing a path for a Chrome binary. | |
537 """ | |
538 with open(symbol_path, mode='a+') as symbol_f: | |
539 symbol_lines = symbol_f.readlines() | |
540 if symbol_lines: | |
541 for line in symbol_lines: | |
542 items = line.split(None, 1) | |
543 address_symbol_dict[items[0]] = items[1].rstrip() | |
544 | |
545 unresolved_addresses = sorted( | |
546 a for a in appeared_addresses if a not in address_symbol_dict) | |
547 | |
548 if unresolved_addresses: | |
549 with tempfile.NamedTemporaryFile( | |
550 suffix='maps', prefix="dmprof", mode='w+') as pprof_in: | |
551 with tempfile.NamedTemporaryFile( | |
552 suffix='symbols', prefix="dmprof", mode='w+') as pprof_out: | |
553 for line in mapping_lines: | |
554 pprof_in.write(line) | |
555 | |
556 for address in unresolved_addresses: | |
557 pprof_in.write(address + '\n') | |
558 | |
559 pprof_in.seek(0) | |
560 | |
561 p = subprocess.Popen( | |
562 '%s --symbols %s' % (PPROF_PATH, chrome_path), | |
563 shell=True, stdin=pprof_in, stdout=pprof_out) | |
564 p.wait() | |
565 | |
566 pprof_out.seek(0) | |
567 symbols = pprof_out.readlines() | |
568 symbol_f.seek(0, 2) | |
569 for address, symbol in zip(unresolved_addresses, symbols): | |
570 stripped_symbol = symbol.strip() | |
571 address_symbol_dict[address] = stripped_symbol | |
572 symbol_f.write('%s %s\n' % (address, symbol.strip())) | |
573 | |
574 | |
575 def parse_policy(policy_path): | |
576 """Parses policy file. | |
577 | |
578 A policy file contains component's names and their | |
579 stacktrace pattern written in regular expression. | |
580 Those patterns are matched against each symbols of | |
581 each stacktraces in the order written in the policy file | |
582 | |
583 Args: | |
584 policy_path: A path for a policy file. | |
585 Returns: | |
586 A list containing component's name and its regex object | |
587 """ | |
588 with open(policy_path, mode='r') as policy_f: | |
589 policy_lines = policy_f.readlines() | |
590 | |
591 policy_version = POLICY_DEEP_1 | |
592 if policy_lines[0].startswith('heap profile policy: '): | |
593 policy_version = policy_lines[0][21:].strip() | |
594 policy_lines.pop(0) | |
595 policy_list = [] | |
596 | |
597 if policy_version == POLICY_DEEP_2 or policy_version == POLICY_DEEP_1: | |
598 sys.stderr.write(' heap profile policy version: %s\n' % policy_version) | |
599 for line in policy_lines: | |
600 if line[0] == '#': | |
601 continue | |
602 | |
603 if policy_version == POLICY_DEEP_2: | |
604 (name, allocation_type, pattern) = line.strip().split(None, 2) | |
605 mmap = False | |
606 if allocation_type == 'mmap': | |
607 mmap = True | |
608 elif policy_version == POLICY_DEEP_1: | |
609 name = line.split()[0] | |
610 pattern = line[len(name) : len(line)].strip() | |
611 mmap = False | |
612 | |
613 if pattern != 'default': | |
614 policy_list.append(Policy(name, mmap, pattern)) | |
615 if components.count(name) == 0: | |
616 components.append(name) | |
617 | |
618 else: | |
619 sys.stderr.write(' invalid heap profile policy version: %s\n' % ( | |
620 policy_version)) | |
621 | |
622 return policy_list | |
623 | |
624 | |
625 def main(): | |
626 if (len(sys.argv) < 4) or (not (sys.argv[1] in ['--csv', | |
627 '--json', | |
628 '--expand', | |
629 '--list', | |
630 '--stacktrace', | |
631 '--pprof'])): | |
632 sys.stderr.write("""Usage: | |
633 %s [options] <chrome-binary> <policy> <profile> [component-name] [depth] | |
634 | |
635 Options: | |
636 --csv Output result in csv format | |
637 --json Output result in json format | |
638 --stacktrace Convert raw address to symbol names | |
639 --list Lists components and their sizes | |
640 --expand Show all stacktraces in the specified component | |
641 of given depth with their sizes | |
642 --pprof Format the profile file so it can be processed | |
643 by pprof | |
644 | |
645 Examples: | |
646 dmprof --csv Debug/chrome dmpolicy hprof.12345.0001.heap > result.csv | |
647 dmprof --json Debug/chrome dmpolicy hprof.12345.0001.heap > result.json | |
648 dmprof --list Debug/chrome dmpolicy hprof.12345.0012.heap | |
649 dmprof --expand Debug/chrome dmpolicy hprof.12345.0012.heap tc-webkit 4 | |
650 dmprof --pprof Debug/chrome dmpolicy hprof.12345.0012.heap > for_pprof.txt | |
651 """ % (sys.argv[0])) | |
652 sys.exit(1) | |
653 | |
654 action = sys.argv[1] | |
655 chrome_path = sys.argv[2] | |
656 policy_path = sys.argv[3] | |
657 log_path = sys.argv[4] | |
658 | |
659 sys.stderr.write('parsing a policy file\n') | |
660 policy_list = parse_policy(policy_path) | |
661 | |
662 p = re.compile('\.[0-9][0-9][0-9][0-9]\.heap') | |
663 prefix = p.sub('', log_path) | |
664 symbol_path = prefix + '.symbols' | |
665 | |
666 sys.stderr.write('parsing the maps file\n') | |
667 maps_path = prefix + '.maps' | |
668 with open(maps_path, 'r') as maps_f: | |
669 maps_lines = maps_f.readlines() | |
670 | |
671 # Reading buckets | |
672 sys.stderr.write('parsing the bucket file\n') | |
673 buckets = {} | |
674 bucket_count = 0 | |
675 n = 0 | |
676 while True: | |
677 buckets_path = '%s.%04d.buckets' % (prefix, n) | |
678 if not os.path.exists(buckets_path): | |
679 if n > 10: | |
680 break | |
681 n += 1 | |
682 continue | |
683 sys.stderr.write('reading buckets from %s\n' % (buckets_path)) | |
684 with open(buckets_path, 'r') as buckets_f: | |
685 for l in buckets_f: | |
686 words = l.split() | |
687 buckets[int(words[0])] = Bucket(words[1:]) | |
688 n += 1 | |
689 | |
690 sys.stderr.write('the number buckets: %d\n' % (bucket_count)) | |
691 | |
692 log_path_list = [log_path] | |
693 | |
694 if action in ('--csv', '--json'): | |
695 # search for the sequence of files | |
696 n = int(log_path[len(log_path) - 9 : len(log_path) - 5]) | |
697 n += 1 # skip current file | |
698 while True: | |
699 p = '%s.%04d.heap' % (prefix, n) | |
700 if os.path.exists(p): | |
701 log_path_list.append(p) | |
702 else: | |
703 break | |
704 n += 1 | |
705 | |
706 logs = [Log(path, buckets) for path in log_path_list] | |
707 | |
708 sys.stderr.write('getting symbols\n') | |
709 update_symbols(symbol_path, maps_lines, chrome_path) | |
710 | |
711 # TODO(dmikurube): Many modes now. Split them into separete functions. | |
712 if action == '--stacktrace': | |
713 logs[0].dump_stacktrace(buckets) | |
714 | |
715 elif action == '--csv': | |
716 sys.stdout.write(','.join(components)) | |
717 sys.stdout.write('\n') | |
718 | |
719 for log in logs: | |
720 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | |
721 s = [] | |
722 for c in components: | |
723 if c in ('hour', 'minute', 'second'): | |
724 s.append('%05.5f' % (component_sizes[c])) | |
725 else: | |
726 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | |
727 sys.stdout.write(','.join(s)) | |
728 sys.stdout.write('\n') | |
729 | |
730 elif action == '--json': | |
731 json_base = { | |
732 'version': 'JSON_DEEP_1', | |
733 'legends': components, | |
734 'snapshots': [], | |
735 } | |
736 for log in logs: | |
737 component_sizes = log.apply_policy(policy_list, buckets, logs[0].log_time) | |
738 component_sizes['log_path'] = log.log_path | |
739 component_sizes['log_time'] = datetime.fromtimestamp( | |
740 log.log_time).strftime('%Y-%m-%d %H:%M:%S') | |
741 json_base['snapshots'].append(component_sizes) | |
742 json.dump(json_base, sys.stdout, indent=2, sort_keys=True) | |
743 | |
744 elif action == '--list': | |
745 component_sizes = logs[0].apply_policy( | |
746 policy_list, buckets, logs[0].log_time) | |
747 for c in components: | |
748 if c in ['hour', 'minute', 'second']: | |
749 sys.stdout.write('%30s %10.3f\n' % (c, component_sizes[c])) | |
750 else: | |
751 sys.stdout.write('%30s %10.3f\n' % ( | |
752 c, component_sizes[c] / 1024.0 / 1024.0)) | |
753 | |
754 elif action == '--expand': | |
755 component_name = sys.argv[5] | |
756 depth = sys.argv[6] | |
757 logs[0].expand(policy_list, buckets, component_name, int(depth)) | |
758 | |
759 elif action == '--pprof': | |
760 if len(sys.argv) > 5: | |
761 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, sys.argv[5]) | |
762 else: | |
763 logs[0].dump_for_pprof(policy_list, buckets, maps_lines, None) | |
764 | |
765 | |
766 if __name__ == '__main__': | |
767 sys.exit(main()) | |
OLD | NEW |