OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2016 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """ | |
7 This script processes trace files and symbolizes stack frames generated by | |
8 Chrome's native heap profiler. This script assumes that the Chrome binary | |
9 referenced in the trace contains symbols, and is the same binary used to emit | |
10 the trace. | |
11 | |
12 === Overview === | |
13 | |
14 Trace file is essentially a giant JSON array of dictionaries (events). | |
15 Events have some predefined keys (e.g. 'pid'), but otherwise are free to | |
16 have anything inside. Trace file contains events from all Chrome processes | |
17 that were sampled during tracing period. | |
18 | |
19 This script cares only about memory dump events generated with memory-infra | |
20 category enabled. | |
21 | |
22 When Chrome native heap profiling is enabled, some memory dump events | |
23 include the following extra information: | |
24 | |
25 * (Per allocator) Information about live allocations at the moment of the | |
26 memory dump (the information includes backtraces, types / categories, | |
27 sizes, and counts of allocations). There are several allocators in | |
28 Chrome: e.g. malloc, blink_gc, partition_alloc. | |
29 | |
30 * (Per process) Stack frame tree of all functions that called allocators | |
31 above. | |
32 | |
33 This script does the following: | |
34 | |
35 1. Parses the given trace file (loads JSON). | |
36 2. Finds memory dump events and parses stack frame tree for each process. | |
37 3. Finds stack frames that have PC addresses instead of function names. | |
38 4. Symbolizes PCs and modifies loaded JSON. | |
39 5. Writes modified JSON back to the file. | |
40 | |
41 The script supports trace files from the following platforms: | |
42 * Android (the script itself must be run on Linux) | |
43 * Linux | |
44 * macOS | |
45 * Windows | |
46 | |
47 Important note - the script doesn't check that it symbolizes same binaries | |
48 that were used at the time trace was taken. I.e. if you take a trace, change | |
49 and rebuild Chrome binaries, the script will blindly use the new binaries. | |
50 | |
51 === Details === | |
52 | |
53 There are two formats of heap profiler information: legacy and modern. The | |
54 main differences relevant to this script are: | |
55 | |
56 * In the modern format the stack frame tree, type name mapping, and string | |
57 mapping nodes are dumped incrementally. These nodes are dumped in each | |
58 memory dump event and carry updates that occurred since the last event. | |
59 | |
60 For example, let's say that when the first memory dump event is generated | |
61 we only know about a function foo() (called from main()) allocating objects | |
62 of type "int": | |
63 | |
64 { | |
65 "args": { | |
66 "dumps": { | |
67 "heaps_v2": { | |
68 "maps": { | |
69 "nodes": [ | |
70 { "id": 1, "name_sid": 1 }, | |
71 { "id": 2, "parent": 1, "name_sid": 3 }, | |
72 ], | |
73 "types": [ | |
74 { "id": 1, "name_sid": 2 }, | |
75 ], | |
76 "strings": [ | |
77 { "id": 1, "string": "main()" }, | |
78 { "id": 2, "string": "int" }, | |
79 { "id": 3, "string": "foo()" }, | |
80 ] | |
81 }, | |
82 "allocators": { ...live allocations per allocator... }, | |
83 ... | |
84 }, | |
85 ... | |
86 } | |
87 }, | |
88 ... | |
89 } | |
90 | |
91 Here: | |
92 * 'nodes' node encodes stack frame tree | |
93 * 'types' node encodes type name mappings | |
94 * 'strings' node encodes string mapping (explained below) | |
95 | |
96 Then, by the time second memory dump even is generated, we learn about | |
97 bar() (called from main()), which also allocated "int" objects. Only the | |
98 new information is dumped, i.e. bar() stack frame: | |
99 | |
100 { | |
101 "args": { | |
102 "dumps": { | |
103 "heaps_v2": { | |
104 "maps": { | |
105 "nodes": [ | |
106 { "id": 2, "parent": 1, "name_sid": 4 }, | |
107 ], | |
108 "types": [], | |
109 "strings": [ | |
110 { "id": 4, "string": "bar()" }, | |
111 ] | |
112 }, | |
113 "allocators": { ...live allocations per allocator... }, | |
114 ... | |
115 }, | |
116 ... | |
117 } | |
118 }, | |
119 ... | |
120 } | |
121 | |
122 Note that 'types' node is empty, since there were no updates. All three | |
123 nodes ('nodes', types', and 'strings') can be empty if there were no updates | |
124 to them. | |
125 | |
126 For simplicity, when the script updates incremental nodes, it puts updated | |
127 content in the first node, and clears all others. I.e. the following stack | |
128 frame nodes: | |
129 | |
130 'nodes': [ | |
131 { "id": 1, "name_sid": 1 }, | |
132 { "id": 2, "parent": 1, "name_sid": 2 }, | |
133 ] | |
134 'nodes': [ | |
135 { "id": 3, "parent": 2, "name_sid": 3 }, | |
136 ] | |
137 'nodes': [ | |
138 { "id": 4, "parent": 3, "name_sid": 4 }, | |
139 { "id": 5, "parent": 1, "name_sid": 5 }, | |
140 ] | |
141 | |
142 After symbolization are written as: | |
143 | |
144 'nodes': [ | |
145 { "id": 1, "name_sid": 1 }, | |
146 { "id": 2, "parent": 1, "name_sid": 2 }, | |
147 { "id": 3, "parent": 2, "name_sid": 3 }, | |
148 { "id": 4, "parent": 3, "name_sid": 4 }, | |
149 { "id": 5, "parent": 1, "name_sid": 5 }, | |
150 ] | |
151 'nodes': [] | |
152 'nodes': [] | |
153 | |
154 | |
155 * In contrast, in the legacy format stack frame tree and type mappings are | |
156 dumped separately from memory dump events, once per process. | |
157 | |
158 Here is how trace file with two memory dump events looks like in the | |
159 legacy format: | |
160 | |
161 { | |
162 "args": { | |
163 "dumps": { | |
164 "heaps": { ...live allocations per allocator... }, | |
165 ... | |
166 } | |
167 }, | |
168 ... | |
169 } | |
170 | |
171 { | |
172 "args": { | |
173 "dumps": { | |
174 "heaps": { ...live allocations per allocator... }, | |
175 ... | |
176 } | |
177 }, | |
178 ... | |
179 } | |
180 | |
181 { | |
182 "args": { | |
183 "typeNames": { | |
184 1: "int", | |
185 } | |
186 }, | |
187 "cat": "__metadata", | |
188 "name": "typeNames", | |
189 ... | |
190 } | |
191 | |
192 { | |
193 "args": { | |
194 "stackFrames": { | |
195 1: { "name": "main" }, | |
196 2: { "name": "foo", "parent": 1 }, | |
197 3: { "name": "bar", "parent": 1 }, | |
198 } | |
199 }, | |
200 "cat": "__metadata", | |
201 "name": "stackFrames", | |
202 ... | |
203 } | |
204 | |
205 | |
206 * Another change in the modern format is 'strings' node, which was added | |
207 to deduplicate stack frame names (mainly for trace file size reduction). | |
208 For consistency 'types' node also uses string mappings. | |
209 | |
210 | |
211 See crbug.com/708930 for more information about the modern format. | |
212 """ | |
213 | |
214 import argparse | |
215 import bisect | |
216 import collections | |
217 import gzip | |
218 import itertools | |
219 import json | |
220 import os | |
221 import re | |
222 import shutil | |
223 import subprocess | |
224 import sys | |
225 import tarfile | |
226 import zipfile | |
227 import tempfile | |
228 | |
229 _SYMBOLS_PATH = os.path.abspath(os.path.join( | |
230 os.path.dirname(os.path.realpath(__file__)), | |
231 '..', | |
232 'third_party', | |
233 'symbols')) | |
234 sys.path.append(_SYMBOLS_PATH) | |
235 # pylint: disable=import-error | |
236 import symbols.elf_symbolizer as elf_symbolizer | |
237 | |
238 import symbolize_trace_atos_regex | |
239 import symbolize_trace_macho_reader | |
240 | |
241 _PY_UTILS_PATH = os.path.abspath(os.path.join( | |
242 os.path.dirname(os.path.realpath(__file__)), | |
243 '..', | |
244 '..', | |
245 'common', | |
246 'py_utils')) | |
247 sys.path.append(_PY_UTILS_PATH) | |
248 # pylint: disable=import-error | |
249 import py_utils.cloud_storage as cloud_storage | |
250 | |
251 class NodeWrapper(object): | |
252 """Wraps an event data node(s). | |
253 | |
254 A node is a reference into a trace event JSON. Wrappers parse nodes to | |
255 provide convenient APIs and update nodes when asked to propagate changes | |
256 back (see ApplyModifications() below). | |
257 | |
258 Here is an example of legacy metadata event that contains stack frame tree: | |
259 | |
260 { | |
261 "args": { | |
262 "stackFrames": { ... } | |
263 }, | |
264 "cat": "__metadata", | |
265 "name": "stackFrames", | |
266 "ph": "M", | |
267 ... | |
268 } | |
269 | |
270 When this event is encountered, a reference to the "stackFrames" dictionary | |
271 is obtained and passed down to a specific wrapped class, which knows how to | |
272 parse / update the dictionary. | |
273 | |
274 There are two parsing patterns depending on whether node is serialized | |
275 incrementally: | |
276 | |
277 * If node is not incremental, then parsing is done by __init__(), | |
278 see MemoryMap for an example. | |
279 | |
280 * If node is incremental, then __init__() does nothing, and instead | |
281 ParseNext() method is called when next node (from a next event) is | |
282 encountered. | |
283 | |
284 Some wrappers can also modify nodes they parsed. In such cases they have | |
285 additional APIs: | |
286 | |
287 * 'modified' flag, which indicates whether the wrapper was changed. | |
288 | |
289 * 'ApplyModifications' method, which propagates changes made to the wrapper | |
290 back to nodes. Successful invocation of ApplyModifications() resets | |
291 'modified' flag. | |
292 | |
293 """ | |
294 pass | |
295 | |
296 | |
297 class MemoryMap(NodeWrapper): | |
298 """Wraps 'process_mmaps' node. | |
299 | |
300 'process_mmaps' node contains information about file mappings. | |
301 | |
302 "process_mmaps": { | |
303 "vm_regions": [ | |
304 { | |
305 "mf": "<file_path>", | |
306 "sa": "<start_address>", | |
307 "sz": "<size>", | |
308 ... | |
309 }, | |
310 ... | |
311 ] | |
312 } | |
313 """ | |
314 | |
315 class Region(object): | |
316 def __init__(self, start_address, size, file_path): | |
317 self._start_address = start_address | |
318 self._size = size | |
319 self._file_path = file_path | |
320 | |
321 @property | |
322 def start_address(self): | |
323 return self._start_address | |
324 | |
325 @property | |
326 def end_address(self): | |
327 return self._start_address + self._size | |
328 | |
329 @property | |
330 def size(self): | |
331 return self._size | |
332 | |
333 @property | |
334 def file_path(self): | |
335 return self._file_path | |
336 | |
337 def __cmp__(self, other): | |
338 if isinstance(other, type(self)): | |
339 other_start_address = other._start_address | |
340 elif isinstance(other, (long, int)): | |
341 other_start_address = other | |
342 else: | |
343 raise Exception('Cannot compare with %s' % type(other)) | |
344 if self._start_address < other_start_address: | |
345 return -1 | |
346 elif self._start_address > other_start_address: | |
347 return 1 | |
348 else: | |
349 return 0 | |
350 | |
351 def __repr__(self): | |
352 return 'Region(0x{:X} - 0x{:X}, {})'.format( | |
353 self.start_address, self.end_address, self.file_path) | |
354 | |
355 def __init__(self, process_mmaps_node): | |
356 regions = [] | |
357 for region_node in process_mmaps_node['vm_regions']: | |
358 regions.append(self.Region( | |
359 long(region_node['sa'], 16), | |
360 long(region_node['sz'], 16), | |
361 region_node['mf'])) | |
362 regions.sort() | |
363 | |
364 # Copy regions without duplicates and check for overlaps. | |
365 self._regions = [] | |
366 previous_region = None | |
367 for region in regions: | |
368 if previous_region is not None: | |
369 if region == previous_region: | |
370 continue | |
371 assert region.start_address >= previous_region.end_address, \ | |
372 'Regions {} and {} overlap.'.format(previous_region, region) | |
373 previous_region = region | |
374 self._regions.append(region) | |
375 | |
376 @property | |
377 def regions(self): | |
378 return self._regions | |
379 | |
380 def FindRegion(self, address): | |
381 """Finds region containing |address|. Returns None if none found.""" | |
382 | |
383 region_index = bisect.bisect_right(self._regions, address) - 1 | |
384 if region_index >= 0: | |
385 region = self._regions[region_index] | |
386 if address >= region.start_address and address < region.end_address: | |
387 return region | |
388 return None | |
389 | |
390 | |
391 class UnsupportedHeapDumpVersionError(Exception): | |
392 """Helper exception class to signal unsupported heap dump version.""" | |
393 | |
394 def __init__(self, version): | |
395 message = 'Unsupported heap dump version: {}'.format(version) | |
396 super(UnsupportedHeapDumpVersionError, self).__init__(message) | |
397 | |
398 | |
399 class StringMap(NodeWrapper): | |
400 """Wraps all 'strings' nodes for a process. | |
401 | |
402 'strings' node contains incremental mappings between integer ids and strings. | |
403 | |
404 "strings": [ | |
405 { | |
406 "id": <string_id>, | |
407 "string": <string> | |
408 }, | |
409 ... | |
410 ] | |
411 """ | |
412 | |
413 def __init__(self): | |
414 self._modified = False | |
415 self._strings_nodes = [] | |
416 self._string_by_id = {} | |
417 self._id_by_string = {} | |
418 self._max_string_id = 0 | |
419 | |
420 @property | |
421 def modified(self): | |
422 """Returns True if the wrapper was modified (see NodeWrapper).""" | |
423 return self._modified | |
424 | |
425 @property | |
426 def string_by_id(self): | |
427 return self._string_by_id | |
428 | |
429 def ParseNext(self, heap_dump_version, strings_node): | |
430 """Parses and interns next node (see NodeWrapper).""" | |
431 | |
432 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
433 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
434 | |
435 self._strings_nodes.append(strings_node) | |
436 for string_node in strings_node: | |
437 self._Insert(string_node['id'], string_node['string']) | |
438 | |
439 def Clear(self): | |
440 """Clears all string mappings.""" | |
441 if self._string_by_id: | |
442 self._modified = True | |
443 # ID #0 means 'no entry' and must always be present. Carry it over. | |
444 null_string = self._string_by_id[0] | |
445 self._string_by_id = {} | |
446 self._id_by_string = {} | |
447 self._Insert(0, null_string) | |
448 self._max_string_id = 0 | |
449 | |
450 def AddString(self, string): | |
451 """Adds a string (if it doesn't exist) and returns its integer id.""" | |
452 string_id = self._id_by_string.get(string) | |
453 if string_id is None: | |
454 string_id = self._max_string_id + 1 | |
455 self._Insert(string_id, string) | |
456 self._modified = True | |
457 return string_id | |
458 | |
459 def ApplyModifications(self): | |
460 """Propagates modifications back to nodes (see NodeWrapper).""" | |
461 if not self.modified: | |
462 return | |
463 | |
464 assert self._strings_nodes, 'no nodes' | |
465 | |
466 # Serialize into the first node, and clear all others. | |
467 | |
468 for strings_node in self._strings_nodes: | |
469 del strings_node[:] | |
470 strings_node = self._strings_nodes[0] | |
471 for string_id, string in self._string_by_id.iteritems(): | |
472 strings_node.append({'id': string_id, 'string': string}) | |
473 | |
474 self._modified = False | |
475 | |
476 def _Insert(self, string_id, string): | |
477 self._id_by_string[string] = string_id | |
478 self._string_by_id[string_id] = string | |
479 self._max_string_id = max(self._max_string_id, string_id) | |
480 | |
481 | |
482 class TypeNameMap(NodeWrapper): | |
483 """Wraps all 'types' nodes for a process. | |
484 | |
485 'types' nodes encode mappings between integer type ids and integer | |
486 string ids (from 'strings' nodes). | |
487 | |
488 "types": [ | |
489 { | |
490 "id": <type_id>, | |
491 "name_sid": <name_string_id> | |
492 } | |
493 ... | |
494 ] | |
495 | |
496 For simplicity string ids are translated into strings during parsing, | |
497 and then translated back to ids in ApplyModifications(). | |
498 """ | |
499 def __init__(self): | |
500 self._modified = False | |
501 self._type_name_nodes = [] | |
502 self._name_by_id = {} | |
503 self._id_by_name = {} | |
504 self._max_type_id = 0 | |
505 | |
506 @property | |
507 def modified(self): | |
508 """Returns True if the wrapper was modified (see NodeWrapper).""" | |
509 return self._modified | |
510 | |
511 @property | |
512 def name_by_id(self): | |
513 """Returns {id -> name} dict (must not be changed directly).""" | |
514 return self._name_by_id | |
515 | |
516 def ParseNext(self, heap_dump_version, type_name_node, string_map): | |
517 """Parses and interns next node (see NodeWrapper). | |
518 | |
519 |string_map| - A StringMap object to use to translate string ids | |
520 to strings. | |
521 """ | |
522 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
523 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
524 | |
525 self._type_name_nodes.append(type_name_node) | |
526 for type_node in type_name_node: | |
527 self._Insert(type_node['id'], | |
528 string_map.string_by_id[type_node['name_sid']]) | |
529 | |
530 def AddType(self, type_name): | |
531 """Adds a type name (if it doesn't exist) and returns its id.""" | |
532 type_id = self._id_by_name.get(type_name) | |
533 if type_id is None: | |
534 type_id = self._max_type_id + 1 | |
535 self._Insert(type_id, type_name) | |
536 self._modified = True | |
537 return type_id | |
538 | |
539 def ApplyModifications(self, string_map, force=False): | |
540 """Propagates modifications back to nodes. | |
541 | |
542 |string_map| - A StringMap object to use to translate strings to ids. | |
543 |force| - Whether to propagate changes regardless of 'modified' flag. | |
544 """ | |
545 if not self.modified and not force: | |
546 return | |
547 | |
548 assert self._type_name_nodes, 'no nodes' | |
549 | |
550 # Serialize into the first node, and clear all others. | |
551 | |
552 for types_node in self._type_name_nodes: | |
553 del types_node[:] | |
554 types_node = self._type_name_nodes[0] | |
555 for type_id, type_name in self._name_by_id.iteritems(): | |
556 types_node.append({ | |
557 'id': type_id, | |
558 'name_sid': string_map.AddString(type_name)}) | |
559 | |
560 self._modified = False | |
561 | |
562 def _Insert(self, type_id, type_name): | |
563 self._id_by_name[type_name] = type_id | |
564 self._name_by_id[type_id] = type_name | |
565 self._max_type_id = max(self._max_type_id, type_id) | |
566 | |
567 | |
568 class StackFrameMap(NodeWrapper): | |
569 """ Wraps stack frame tree nodes for a process. | |
570 | |
571 For the legacy format this wrapper expects a single 'stackFrames' node | |
572 (which comes from metadata event): | |
573 | |
574 "stackFrames": { | |
575 "<frame_id>": { | |
576 "name": "<frame_name>" | |
577 "parent": "<parent_frame_id>" | |
578 }, | |
579 ... | |
580 } | |
581 | |
582 For the modern format this wrapper expects several 'nodes' nodes: | |
583 | |
584 "nodes": [ | |
585 { | |
586 "id": <frame_id>, | |
587 "parent": <parent_frame_id>, | |
588 "name_sid": <name_string_id> | |
589 }, | |
590 ... | |
591 ] | |
592 | |
593 In both formats frame name is a string. Native heap profiler generates | |
594 specially formatted frame names (e.g. "pc:10eb78dba") for function | |
595 addresses (PCs). Inner Frame class below parses name and extracts PC, | |
596 if it's there. | |
597 """ | |
598 class Frame(object): | |
599 def __init__(self, frame_id, name, parent_frame_id): | |
600 self._modified = False | |
601 self._id = frame_id | |
602 self._name = name | |
603 self._pc = self._ParsePC(name) | |
604 self._parent_id = parent_frame_id | |
605 self._ext = None | |
606 | |
607 @property | |
608 def modified(self): | |
609 """Returns True if the frame was modified. | |
610 | |
611 For example changing frame's name sets this flag (since the change | |
612 needs to be propagated back to nodes). | |
613 """ | |
614 return self._modified | |
615 | |
616 @property | |
617 def id(self): | |
618 """Frame id (integer).""" | |
619 return self._id | |
620 | |
621 @property | |
622 def pc(self): | |
623 """Parsed (integer) PC of the frame, or None.""" | |
624 return self._pc | |
625 | |
626 @property | |
627 def name(self): | |
628 """Name of the frame (see above).""" | |
629 return self._name | |
630 | |
631 @name.setter | |
632 def name(self, value): | |
633 """Changes the name. Doesn't affect value of |pc|.""" | |
634 self._modified = True | |
635 self._name = value | |
636 | |
637 @property | |
638 def parent_id(self): | |
639 """Parent frame id (integer).""" | |
640 return self._parent_id | |
641 | |
642 _PC_TAG = 'pc:' | |
643 | |
644 def _ParsePC(self, name): | |
645 if not name.startswith(self._PC_TAG): | |
646 return None | |
647 return long(name[len(self._PC_TAG):], 16) | |
648 | |
649 def _ClearModified(self): | |
650 self._modified = False | |
651 | |
652 def __init__(self): | |
653 self._modified = False | |
654 self._heap_dump_version = None | |
655 self._stack_frames_nodes = [] | |
656 self._frame_by_id = {} | |
657 | |
658 @property | |
659 def modified(self): | |
660 """Returns True if the wrapper or any of its frames were modified.""" | |
661 return (self._modified or | |
662 any(f.modified for f in self._frame_by_id.itervalues())) | |
663 | |
664 @property | |
665 def frame_by_id(self): | |
666 """Returns {id -> frame} dict (must not be modified directly).""" | |
667 return self._frame_by_id | |
668 | |
669 def ParseNext(self, heap_dump_version, stack_frames_node, string_map): | |
670 """Parses the next stack frames node (see NodeWrapper). | |
671 | |
672 For the modern format |string_map| is used to translate string ids | |
673 to strings. | |
674 """ | |
675 | |
676 frame_by_id = {} | |
677 if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
678 if self._stack_frames_nodes: | |
679 raise Exception('Legacy stack frames node is expected only once.') | |
680 for frame_id, frame_node in stack_frames_node.iteritems(): | |
681 frame = self.Frame(frame_id, | |
682 frame_node['name'], | |
683 frame_node.get('parent')) | |
684 frame_by_id[frame.id] = frame | |
685 else: | |
686 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
687 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
688 for frame_node in stack_frames_node: | |
689 frame = self.Frame(frame_node['id'], | |
690 string_map.string_by_id[frame_node['name_sid']], | |
691 frame_node.get('parent')) | |
692 frame_by_id[frame.id] = frame | |
693 | |
694 self._heap_dump_version = heap_dump_version | |
695 self._stack_frames_nodes.append(stack_frames_node) | |
696 | |
697 self._frame_by_id.update(frame_by_id) | |
698 | |
699 def ApplyModifications(self, string_map, force=False): | |
700 """Applies modifications back to nodes (see NodeWrapper).""" | |
701 | |
702 if not self.modified and not force: | |
703 return | |
704 | |
705 assert self._stack_frames_nodes, 'no nodes' | |
706 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
707 assert string_map is None, \ | |
708 'string_map should not be used with the legacy format' | |
709 | |
710 # Serialize frames into the first node, clear all others. | |
711 | |
712 for frames_node in self._stack_frames_nodes: | |
713 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
714 frames_node.clear() | |
715 else: | |
716 del frames_node[:] | |
717 | |
718 frames_node = self._stack_frames_nodes[0] | |
719 for frame in self._frame_by_id.itervalues(): | |
720 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
721 frame_node = {'name': frame.name} | |
722 frames_node[frame.id] = frame_node | |
723 else: | |
724 frame_node = { | |
725 'id': frame.id, | |
726 'name_sid': string_map.AddString(frame.name) | |
727 } | |
728 frames_node.append(frame_node) | |
729 if frame.parent_id is not None: | |
730 frame_node['parent'] = frame.parent_id | |
731 frame._ClearModified() | |
732 | |
733 self._modified = False | |
734 | |
735 | |
736 class Trace(NodeWrapper): | |
737 """Wrapper for the root trace node (i.e. the trace JSON itself). | |
738 | |
739 This wrapper parses select nodes from memory-infra events and groups | |
740 parsed data per-process (see inner Process class below). | |
741 """ | |
742 | |
743 # Indicates legacy heap dump format. | |
744 HEAP_DUMP_VERSION_LEGACY = 'Legacy' | |
745 | |
746 # Indicates variation of a modern heap dump format. | |
747 HEAP_DUMP_VERSION_1 = 1 | |
748 | |
749 class Process(object): | |
750 """Collection of per-process data and wrappers.""" | |
751 | |
752 def __init__(self, pid): | |
753 self._pid = pid | |
754 self._name = None | |
755 self._memory_map = None | |
756 self._stack_frame_map = StackFrameMap() | |
757 self._type_name_map = TypeNameMap() | |
758 self._string_map = StringMap() | |
759 self._heap_dump_version = None | |
760 | |
761 @property | |
762 def modified(self): | |
763 return self._stack_frame_map.modified or self._type_name_map.modified | |
764 | |
765 @property | |
766 def pid(self): | |
767 return self._pid | |
768 | |
769 @property | |
770 def name(self): | |
771 return self._name | |
772 | |
773 @property | |
774 def unique_name(self): | |
775 """Returns string that includes both process name and its pid.""" | |
776 name = self._name if self._name else 'UnnamedProcess' | |
777 return '{}({})'.format(name, self._pid) | |
778 | |
779 @property | |
780 def memory_map(self): | |
781 return self._memory_map | |
782 | |
783 @property | |
784 def stack_frame_map(self): | |
785 return self._stack_frame_map | |
786 | |
787 @property | |
788 def type_name_map(self): | |
789 return self._type_name_map | |
790 | |
791 def ApplyModifications(self): | |
792 """Calls ApplyModifications() on contained wrappers.""" | |
793 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
794 self._stack_frame_map.ApplyModifications(None) | |
795 else: | |
796 if self._stack_frame_map.modified or self._type_name_map.modified: | |
797 self._string_map.Clear() | |
798 self._stack_frame_map.ApplyModifications(self._string_map, force=True) | |
799 self._type_name_map.ApplyModifications(self._string_map, force=True) | |
800 self._string_map.ApplyModifications() | |
801 | |
802 def __init__(self, trace_node): | |
803 self._trace_node = trace_node | |
804 self._processes = [] | |
805 self._heap_dump_version = None | |
806 self._version = None | |
807 self._is_chromium = True | |
808 self._is_64bit = False | |
809 self._is_win = False | |
810 self._is_mac = False | |
811 | |
812 # Misc per-process information needed only during parsing. | |
813 class ProcessExt(object): | |
814 def __init__(self, pid): | |
815 self.process = Trace.Process(pid) | |
816 self.mapped_entry_names = set() | |
817 self.process_mmaps_node = None | |
818 self.seen_strings_node = False | |
819 | |
820 process_ext_by_pid = {} | |
821 | |
822 if isinstance(trace_node, dict): | |
823 metadata = trace_node['metadata'] | |
824 product_version = metadata['product-version'] | |
825 # product-version has the form "Chrome/60.0.3103.0" | |
826 self._version = product_version.split('/', 1)[-1] | |
827 | |
828 command_line = metadata['command_line'] | |
829 self._is_win = re.search('windows', metadata['os-name'] , re.IGNORECASE) | |
830 self._is_mac = re.search('mac', metadata['os-name'] , re.IGNORECASE) | |
831 | |
832 if self._is_win: | |
833 self._is_chromium = ( | |
834 not re.search('Chrome SxS\\\\Application\\\\chrome.exe', command_line, | |
835 re.IGNORECASE) and | |
836 not re.search('Chrome\\\\Application\\\\chrome.exe', command_line, | |
837 re.IGNORECASE)) | |
838 if self._is_mac: | |
839 self._is_chromium = re.search('chromium', command_line, re.IGNORECASE) | |
840 | |
841 self._is_64bit = ( | |
842 re.search('x86_64', metadata['os-arch'] , re.IGNORECASE) and | |
843 not re.search('WOW64', metadata['user-agent'] , re.IGNORECASE)) | |
844 | |
845 # Android traces produced via 'chrome://inspect/?tracing#devices' are | |
846 # just list of events. | |
847 events = trace_node if isinstance(trace_node, list) \ | |
848 else trace_node['traceEvents'] | |
849 for event in events: | |
850 name = event.get('name') | |
851 if not name: | |
852 continue | |
853 | |
854 pid = event['pid'] | |
855 process_ext = process_ext_by_pid.get(pid) | |
856 if process_ext is None: | |
857 process_ext = ProcessExt(pid) | |
858 process_ext_by_pid[pid] = process_ext | |
859 process = process_ext.process | |
860 | |
861 phase = event['ph'] | |
862 if phase == self._EVENT_PHASE_METADATA: | |
863 if name == 'process_name': | |
864 process._name = event['args']['name'] | |
865 elif name == 'stackFrames': | |
866 process._stack_frame_map.ParseNext( | |
867 self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY), | |
868 event['args']['stackFrames'], | |
869 process._string_map) | |
870 elif phase == self._EVENT_PHASE_MEMORY_DUMP: | |
871 dumps = event['args']['dumps'] | |
872 process_mmaps = dumps.get('process_mmaps') | |
873 if process_mmaps: | |
874 # We want the most recent memory map, so parsing happens later | |
875 # once we finished reading all events. | |
876 process_ext.process_mmaps_node = process_mmaps | |
877 heaps = dumps.get('heaps_v2') | |
878 if heaps: | |
879 version = self._UseHeapDumpVersion(heaps['version']) | |
880 maps = heaps.get('maps') | |
881 if maps: | |
882 process_ext.mapped_entry_names.update(maps.iterkeys()) | |
883 types = maps.get('types') | |
884 stack_frames = maps.get('nodes') | |
885 strings = maps.get('strings') | |
886 if (strings is None and (types or stack_frames) | |
887 and not process_ext.seen_strings_node): | |
888 # ApplyModifications() for TypeNameMap and StackFrameMap puts | |
889 # everything into the first node and depends on StringMap. So | |
890 # we need to make sure that 'strings' node is there if any of | |
891 # other two nodes present. | |
892 strings = [] | |
893 maps['strings'] = strings | |
894 if strings is not None: | |
895 process_ext.seen_strings_node = True | |
896 process._string_map.ParseNext(version, strings) | |
897 if types: | |
898 process._type_name_map.ParseNext( | |
899 version, types, process._string_map) | |
900 if stack_frames: | |
901 process._stack_frame_map.ParseNext( | |
902 version, stack_frames, process._string_map) | |
903 | |
904 self._processes = [] | |
905 for pe in process_ext_by_pid.itervalues(): | |
906 pe.process._heap_dump_version = self._heap_dump_version | |
907 if pe.process_mmaps_node: | |
908 # Now parse the most recent memory map. | |
909 pe.process._memory_map = MemoryMap(pe.process_mmaps_node) | |
910 self._processes.append(pe.process) | |
911 | |
912 @property | |
913 def node(self): | |
914 """Root node (that was passed to the __init__).""" | |
915 return self._trace_node | |
916 | |
917 @property | |
918 def modified(self): | |
919 """Returns True if trace file needs to be updated. | |
920 | |
921 Before writing trace JSON back to a file ApplyModifications() needs | |
922 to be called. | |
923 """ | |
924 return any(p.modified for p in self._processes) | |
925 | |
926 @property | |
927 def processes(self): | |
928 return self._processes | |
929 | |
930 @property | |
931 def heap_dump_version(self): | |
932 return self._heap_dump_version | |
933 | |
934 @property | |
935 def version(self): | |
936 return self._version | |
937 | |
938 @property | |
939 def is_chromium(self): | |
940 return self._is_chromium | |
941 | |
942 @property | |
943 def is_mac(self): | |
944 return self._is_mac | |
945 | |
946 @property | |
947 def is_win(self): | |
948 return self._is_win | |
949 | |
950 @property | |
951 def is_64bit(self): | |
952 return self._is_64bit | |
953 | |
954 def ApplyModifications(self): | |
955 """Propagates modifications back to the trace JSON.""" | |
956 for process in self._processes: | |
957 process.ApplyModifications() | |
958 assert not self.modified, 'still modified' | |
959 | |
960 # Relevant trace event phases from Chromium's | |
961 # src/base/trace_event/common/trace_event_common.h. | |
962 _EVENT_PHASE_METADATA = 'M' | |
963 _EVENT_PHASE_MEMORY_DUMP = 'v' | |
964 | |
965 def _UseHeapDumpVersion(self, version): | |
966 if self._heap_dump_version is None: | |
967 self._heap_dump_version = version | |
968 return version | |
969 elif self._heap_dump_version != version: | |
970 raise Exception( | |
971 ("Inconsistent trace file: first saw '{}' heap dump version, " | |
972 "then '{}'.").format(self._heap_dump_version, version)) | |
973 else: | |
974 return version | |
975 | |
976 | |
977 class SymbolizableFile(object): | |
978 """Holds file path, addresses to symbolize and stack frames to update. | |
979 | |
980 This class is a link between ELFSymbolizer and a trace file: it specifies | |
981 what to symbolize (addresses) and what to update with the symbolization | |
982 result (frames). | |
983 """ | |
984 def __init__(self, file_path): | |
985 self.path = file_path | |
986 self.symbolizable_path = file_path # path to use for symbolization | |
987 self.frames_by_address = collections.defaultdict(list) | |
988 | |
989 | |
990 def ResolveSymbolizableFiles(processes): | |
991 """Resolves and groups PCs into list of SymbolizableFiles. | |
992 | |
993 As part of the grouping process, this function resolves PC from each stack | |
994 frame to the corresponding mmap region. Stack frames that failed to resolve | |
995 are symbolized with '<unresolved>'. | |
996 """ | |
997 symfile_by_path = {} | |
998 for process in processes: | |
999 if not process.memory_map: | |
1000 continue | |
1001 for frame in process.stack_frame_map.frame_by_id.itervalues(): | |
1002 if frame.pc is None: | |
1003 continue | |
1004 region = process.memory_map.FindRegion(frame.pc) | |
1005 if region is None: | |
1006 frame.name = '<unresolved>' | |
1007 continue | |
1008 | |
1009 symfile = symfile_by_path.get(region.file_path) | |
1010 if symfile is None: | |
1011 symfile = SymbolizableFile(region.file_path) | |
1012 symfile_by_path[symfile.path] = symfile | |
1013 | |
1014 relative_pc = frame.pc - region.start_address | |
1015 symfile.frames_by_address[relative_pc].append(frame) | |
1016 return symfile_by_path.values() | |
1017 | |
1018 | |
1019 def FindInSystemPath(binary_name): | |
1020 paths = os.environ['PATH'].split(os.pathsep) | |
1021 for path in paths: | |
1022 binary_path = os.path.join(path, binary_name) | |
1023 if os.path.isfile(binary_path): | |
1024 return binary_path | |
1025 return None | |
1026 | |
1027 | |
1028 class Symbolizer(object): | |
1029 """Encapsulates platform-specific symbolization logic.""" | |
1030 | |
1031 def __init__(self): | |
1032 self.is_mac = sys.platform == 'darwin' | |
1033 self.is_win = sys.platform == 'win32' | |
1034 if self.is_mac: | |
1035 self.binary = 'atos' | |
1036 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() | |
1037 elif self.is_win: | |
1038 self.binary = 'addr2line-pdb.exe' | |
1039 else: | |
1040 self.binary = 'addr2line' | |
1041 self.symbolizer_path = FindInSystemPath(self.binary) | |
1042 | |
1043 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): | |
1044 def _SymbolizerCallback(sym_info, frames): | |
1045 # Unwind inline chain to the top. | |
1046 while sym_info.inlined_by: | |
1047 sym_info = sym_info.inlined_by | |
1048 | |
1049 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name | |
1050 for frame in frames: | |
1051 frame.name = symbolized_name | |
1052 | |
1053 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, | |
1054 self.symbolizer_path, | |
1055 _SymbolizerCallback, | |
1056 inlines=True) | |
1057 | |
1058 for address, frames in symfile.frames_by_address.iteritems(): | |
1059 # SymbolizeAsync() asserts that the type of address is int. We operate | |
1060 # on longs (since they are raw pointers possibly from 64-bit processes). | |
1061 # It's OK to cast here because we're passing relative PC, which should | |
1062 # always fit into int. | |
1063 symbolizer.SymbolizeAsync(int(address), frames) | |
1064 | |
1065 symbolizer.Join() | |
1066 | |
1067 | |
1068 def _SymbolizeMac(self, symfile): | |
1069 load_address = (symbolize_trace_macho_reader. | |
1070 ReadMachOTextLoadAddress(symfile.symbolizable_path)) | |
1071 assert load_address is not None | |
1072 | |
1073 address_os_file, address_file_path = tempfile.mkstemp() | |
1074 try: | |
1075 with os.fdopen(address_os_file, 'w') as address_file: | |
1076 for address in symfile.frames_by_address.iterkeys(): | |
1077 address_file.write('{:x} '.format(address + load_address)) | |
1078 | |
1079 cmd = [self.symbolizer_path, '-arch', 'x86_64', '-l', | |
1080 '0x%x' % load_address, '-o', symfile.symbolizable_path, | |
1081 '-f', address_file_path] | |
1082 output_array = subprocess.check_output(cmd).split('\n') | |
1083 | |
1084 for i, frames in enumerate(symfile.frames_by_address.itervalues()): | |
1085 symbolized_name = self._matcher.Match(output_array[i]) | |
1086 for frame in frames: | |
1087 frame.name = symbolized_name | |
1088 finally: | |
1089 os.remove(address_file_path) | |
1090 | |
1091 def _SymbolizeWin(self, symfile): | |
1092 """Invoke symbolizer binary on windows and write all input in one go. | |
1093 | |
1094 Unlike linux, on windows, symbolization talks through a shared system | |
1095 service that handles communication with the NT symbol servers. This | |
1096 creates an explicit serialization (and therefor lock contention) of | |
1097 any process using the symbol API for files do not have a local PDB. | |
1098 | |
1099 Thus, even though the windows symbolizer binary can be make command line | |
1100 compatible with the POSIX addr2line interface, parallelizing the | |
1101 symbolization does not yield the same performance effects. Running | |
1102 just one symbolizer seems good enough for now. Can optimize later | |
1103 if this becomes a bottleneck. | |
1104 """ | |
1105 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', | |
1106 symfile.symbolizable_path] | |
1107 | |
1108 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, | |
1109 stderr=sys.stderr) | |
1110 addrs = ["%x" % relative_pc for relative_pc in | |
1111 symfile.frames_by_address.keys()] | |
1112 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) | |
1113 stdout_data = stdout_data.split('\n') | |
1114 | |
1115 # This is known to be in the same order as stderr_data. | |
1116 for i, addr in enumerate(addrs): | |
1117 for frame in symfile.frames_by_address[int(addr, 16)]: | |
1118 # Output of addr2line with --functions is always 2 outputs per | |
1119 # symbol, function name followed by source line number. Only grab | |
1120 # the function name as line info is not always available. | |
1121 frame.name = stdout_data[i * 2] | |
1122 | |
1123 def Symbolize(self, symfile, unsymbolized_name): | |
1124 if self.is_mac: | |
1125 self._SymbolizeMac(symfile) | |
1126 elif self.is_win: | |
1127 self._SymbolizeWin(symfile) | |
1128 else: | |
1129 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) | |
1130 | |
1131 def IsSymbolizableFile(self, file_path): | |
1132 if self.is_win: | |
1133 extension = os.path.splitext(file_path)[1].lower() | |
1134 return extension in ['.dll', '.exe'] | |
1135 else: | |
1136 result = subprocess.check_output(['file', '-0', file_path]) | |
1137 type_string = result[result.find('\0') + 1:] | |
1138 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', | |
1139 type_string, re.DOTALL)) | |
1140 | |
1141 | |
1142 def SymbolizeFiles(symfiles, symbolizer): | |
1143 """Symbolizes each file in the given list of SymbolizableFiles | |
1144 and updates stack frames with symbolization results.""" | |
1145 | |
1146 if not symfiles: | |
1147 print 'Nothing to symbolize.' | |
1148 return | |
1149 | |
1150 print 'Symbolizing...' | |
1151 | |
1152 def _SubPrintf(message, *args): | |
1153 print (' ' + message).format(*args) | |
1154 | |
1155 for symfile in symfiles: | |
1156 unsymbolized_name = '<{}>'.format( | |
1157 symfile.path if symfile.path else 'unnamed') | |
1158 | |
1159 problem = None | |
1160 if not os.path.isabs(symfile.symbolizable_path): | |
1161 problem = 'not a file' | |
1162 elif not os.path.isfile(symfile.symbolizable_path): | |
1163 problem = "file doesn't exist" | |
1164 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): | |
1165 problem = 'file is not symbolizable' | |
1166 if problem: | |
1167 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", | |
1168 len(symfile.frames_by_address), | |
1169 symfile.symbolizable_path, | |
1170 problem) | |
1171 for frames in symfile.frames_by_address.itervalues(): | |
1172 for frame in frames: | |
1173 frame.name = unsymbolized_name | |
1174 continue | |
1175 | |
1176 _SubPrintf('Symbolizing {} PCs from {}...', | |
1177 len(symfile.frames_by_address), | |
1178 symfile.symbolizable_path) | |
1179 | |
1180 symbolizer.Symbolize(symfile, unsymbolized_name) | |
1181 | |
1182 | |
1183 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) | |
1184 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available | |
1185 # via 'name' group. | |
1186 ANDROID_PATH_MATCHER = re.compile( | |
1187 r'^/data/(?:' | |
1188 r'app/[^/]+/lib/[^/]+/|' | |
1189 r'app-lib/[^/]+/|' | |
1190 r'data/[^/]+/incremental-install-files/lib/' | |
1191 r')(?P<name>.*\.so)') | |
1192 | |
1193 # Subpath of output path where unstripped libraries are stored. | |
1194 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' | |
1195 | |
1196 | |
1197 def HaveFilesFromAndroid(symfiles): | |
1198 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) | |
1199 | |
1200 | |
1201 def RemapAndroidFiles(symfiles, output_path): | |
1202 for symfile in symfiles: | |
1203 match = ANDROID_PATH_MATCHER.match(symfile.path) | |
1204 if match: | |
1205 name = match.group('name') | |
1206 symfile.symbolizable_path = os.path.join( | |
1207 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) | |
1208 else: | |
1209 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). | |
1210 # Without this, files won't be symbolized with "file not found" problem, | |
1211 # which is not accurate. | |
1212 symfile.symbolizable_path = 'android://{}'.format(symfile.path) | |
1213 | |
1214 | |
1215 def RemapMacFiles(symfiles, symbol_base_directory, version): | |
1216 suffix = ("Google Chrome Framework.dSYM/Contents/Resources/DWARF/" | |
1217 "Google Chrome Framework") | |
1218 symbol_sub_dir = os.path.join(symbol_base_directory, version) | |
1219 symbolizable_path = os.path.join(symbol_sub_dir, suffix) | |
1220 | |
1221 for symfile in symfiles: | |
1222 if symfile.path.endswith("Google Chrome Framework"): | |
1223 symfile.symbolizable_path = symbolizable_path | |
1224 | |
1225 def RemapWinFiles(symfiles, symbol_base_directory, version, is64bit): | |
1226 folder = "win64" if is64bit else "win" | |
1227 symbol_sub_dir = os.path.join(symbol_base_directory, | |
1228 "chrome-" + folder + "-" + version) | |
1229 for symfile in symfiles: | |
1230 image = os.path.join(symbol_sub_dir, os.path.basename(symfile.path)) | |
1231 symbols = image + ".pdb" | |
1232 if os.path.isfile(image) and os.path.isfile(symbols): | |
1233 symfile.symbolizable_path = image | |
1234 | |
1235 def Symbolize(options, trace, symbolizer): | |
1236 symfiles = ResolveSymbolizableFiles(trace.processes) | |
1237 | |
1238 # Android trace files don't have any indication they are from Android. | |
1239 # So we're checking for Android-specific paths. | |
1240 if HaveFilesFromAndroid(symfiles): | |
1241 if not options.output_directory: | |
1242 sys.exit('The trace file appears to be from Android. Please ' | |
1243 'specify output directory to properly symbolize it.') | |
1244 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) | |
1245 | |
1246 | |
1247 if not trace.is_chromium: | |
1248 if symbolizer.is_mac: | |
1249 RemapMacFiles(symfiles, options.symbol_base_directory, trace.version) | |
1250 if symbolizer.is_win: | |
1251 RemapWinFiles(symfiles, options.symbol_base_directory, trace.version, | |
1252 trace.is_64bit) | |
1253 | |
1254 SymbolizeFiles(symfiles, symbolizer) | |
1255 | |
1256 | |
1257 def OpenTraceFile(file_path, mode): | |
1258 if file_path.endswith('.gz'): | |
1259 return gzip.open(file_path, mode + 'b') | |
1260 else: | |
1261 return open(file_path, mode + 't') | |
1262 | |
1263 | |
1264 def FetchAndExtractSymbolsMac(symbol_base_directory, version): | |
1265 def GetLocalPath(base_dir, version): | |
1266 return os.path.join(base_dir, version + ".tar.bz2") | |
1267 def GetSymbolsPath(version): | |
1268 return "desktop-*/" + version + "/mac64/Google Chrome.dSYM.tar.bz2" | |
1269 def ExtractSymbolTarFile(symbol_sub_dir, symbol_tar_file): | |
1270 os.makedirs(symbol_sub_dir) | |
1271 with tarfile.open(os.path.expanduser(symbol_tar_file), "r:bz2") as tar: | |
1272 tar.extractall(symbol_sub_dir) | |
1273 | |
1274 symbol_sub_dir = os.path.join(symbol_base_directory, version) | |
1275 if os.path.isdir(symbol_sub_dir): | |
1276 return True | |
1277 | |
1278 bzip_path = GetLocalPath(symbol_base_directory, version) | |
1279 if not os.path.isfile(bzip_path): | |
1280 | |
1281 _CLOUD_STORAGE_BUCKET = "chrome-unsigned" | |
1282 if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version)): | |
1283 print "Can't find symbols on GCS." | |
1284 return False | |
1285 print "Downloading symbols files from GCS, please wait." | |
1286 cloud_storage.Get(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version), bzip_path) | |
1287 | |
1288 ExtractSymbolTarFile(symbol_sub_dir, bzip_path) | |
1289 return True | |
1290 | |
1291 | |
1292 def FetchAndExtractSymbolsWin(symbol_base_directory, version, is64bit): | |
1293 def DownloadAndExtractZipFile(zip_path, source, destination): | |
1294 if not os.path.isfile(zip_path): | |
1295 _CLOUD_STORAGE_BUCKET = "chrome-unsigned" | |
1296 if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, source): | |
1297 print "Can't find symbols on GCS." | |
1298 return False | |
1299 print "Downloading symbols files from GCS, please wait." | |
1300 cloud_storage.Get(_CLOUD_STORAGE_BUCKET, source, zip_path) | |
1301 if not os.path.isfile(zip_path): | |
1302 print "Can't download symbols on GCS." | |
1303 return False | |
1304 with zipfile.ZipFile(zip_path, "r") as zip: | |
1305 for member in zip.namelist(): | |
1306 filename = os.path.basename(member) | |
1307 # Skip directories. | |
1308 if not filename: | |
1309 continue | |
1310 # Extract archived files. | |
1311 source = zip.open(member) | |
1312 target = file(os.path.join(symbol_sub_dir, filename), "wb") | |
1313 with source, target: | |
1314 shutil.copyfileobj(source, target) | |
1315 | |
1316 folder = "win64" if is64bit else "win" | |
1317 gcs_folder = "desktop-*/" + version + "/" + folder + "-pgo/" | |
1318 | |
1319 symbol_sub_dir = os.path.join(symbol_base_directory, | |
1320 "chrome-" + folder + "-" + version) | |
1321 if os.path.isdir(symbol_sub_dir): | |
1322 return True | |
1323 | |
1324 os.makedirs(symbol_sub_dir) | |
1325 DownloadAndExtractZipFile( | |
1326 os.path.join(symbol_base_directory, | |
1327 "chrome-" + folder + "-" + version + "-syms.zip"), | |
1328 gcs_folder + "chrome-win32-syms.zip", | |
1329 symbol_sub_dir) | |
1330 DownloadAndExtractZipFile( | |
1331 os.path.join(symbol_base_directory, | |
1332 "chrome-" + folder + "-" + version + ".zip"), | |
1333 gcs_folder + "chrome-" + folder + "-pgo.zip", | |
1334 symbol_sub_dir) | |
1335 | |
1336 return True | |
1337 | |
1338 # Suffix used for backup files. | |
1339 BACKUP_FILE_TAG = '.BACKUP' | |
1340 | |
1341 def main(): | |
1342 parser = argparse.ArgumentParser() | |
1343 parser.add_argument( | |
1344 'file', | |
1345 help='Trace file to symbolize (.json or .json.gz)') | |
1346 | |
1347 parser.add_argument( | |
1348 '--no-backup', dest='backup', default='true', action='store_false', | |
1349 help="Don't create {} files".format(BACKUP_FILE_TAG)) | |
1350 | |
1351 parser.add_argument( | |
1352 '--output-directory', | |
1353 help='The path to the build output directory, such as out/Debug.') | |
1354 | |
1355 home_dir = os.path.expanduser('~') | |
1356 default_dir = os.path.join(home_dir, "symbols") | |
1357 parser.add_argument( | |
1358 '--symbol-base-directory', | |
1359 default=default_dir, | |
1360 help='Directory where symbols are downloaded and cached.') | |
1361 | |
1362 symbolizer = Symbolizer() | |
1363 if symbolizer.symbolizer_path is None: | |
1364 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) | |
1365 | |
1366 options = parser.parse_args() | |
1367 | |
1368 trace_file_path = options.file | |
1369 | |
1370 print 'Reading trace file...' | |
1371 with OpenTraceFile(trace_file_path, 'r') as trace_file: | |
1372 trace = Trace(json.load(trace_file)) | |
1373 | |
1374 # Perform some sanity checks. | |
1375 if trace.is_win and sys.platform != 'win32': | |
1376 print "Cannot symbolize a windows trace on this architecture!" | |
1377 return False | |
1378 | |
1379 # If the trace is from Chromium, assume that symbols are already present. | |
1380 # Otherwise the trace is from Google Chrome. Assume that this is not a local | |
1381 # build of Google Chrome with symbols, and that we need to fetch symbols | |
1382 # from gcs. | |
1383 if not trace.is_chromium: | |
1384 has_symbols = False | |
1385 if symbolizer.is_mac: | |
1386 has_symbols = FetchAndExtractSymbolsMac(options.symbol_base_directory, | |
1387 trace.version) | |
1388 if symbolizer.is_win: | |
1389 has_symbols = FetchAndExtractSymbolsWin(options.symbol_base_directory, | |
1390 trace.version, trace.is_64bit) | |
1391 if not has_symbols: | |
1392 print 'Cannot fetch symbols from GCS' | |
1393 return False | |
1394 | |
1395 Symbolize(options, trace, symbolizer) | |
1396 | |
1397 if trace.modified: | |
1398 trace.ApplyModifications() | |
1399 | |
1400 if options.backup: | |
1401 backup_file_path = trace_file_path + BACKUP_FILE_TAG | |
1402 print 'Backing up trace file to {}'.format(backup_file_path) | |
1403 os.rename(trace_file_path, backup_file_path) | |
1404 | |
1405 print 'Updating the trace file...' | |
1406 with OpenTraceFile(trace_file_path, 'w') as trace_file: | |
1407 json.dump(trace.node, trace_file) | |
1408 else: | |
1409 print 'No modifications were made - not updating the trace file.' | |
1410 | |
1411 | |
1412 if __name__ == '__main__': | |
1413 main() | |
OLD | NEW |