| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """ | |
| 7 This script processes trace files and symbolizes stack frames generated by | |
| 8 Chrome's native heap profiler. This script assumes that the Chrome binary | |
| 9 referenced in the trace contains symbols, and is the same binary used to emit | |
| 10 the trace. | |
| 11 | |
| 12 === Overview === | |
| 13 | |
| 14 Trace file is essentially a giant JSON array of dictionaries (events). | |
| 15 Events have some predefined keys (e.g. 'pid'), but otherwise are free to | |
| 16 have anything inside. Trace file contains events from all Chrome processes | |
| 17 that were sampled during tracing period. | |
| 18 | |
| 19 This script cares only about memory dump events generated with memory-infra | |
| 20 category enabled. | |
| 21 | |
| 22 When Chrome native heap profiling is enabled, some memory dump events | |
| 23 include the following extra information: | |
| 24 | |
| 25 * (Per allocator) Information about live allocations at the moment of the | |
| 26 memory dump (the information includes backtraces, types / categories, | |
| 27 sizes, and counts of allocations). There are several allocators in | |
| 28 Chrome: e.g. malloc, blink_gc, partition_alloc. | |
| 29 | |
| 30 * (Per process) Stack frame tree of all functions that called allocators | |
| 31 above. | |
| 32 | |
| 33 This script does the following: | |
| 34 | |
| 35 1. Parses the given trace file (loads JSON). | |
| 36 2. Finds memory dump events and parses stack frame tree for each process. | |
| 37 3. Finds stack frames that have PC addresses instead of function names. | |
| 38 4. Symbolizes PCs and modifies loaded JSON. | |
| 39 5. Writes modified JSON back to the file. | |
| 40 | |
| 41 The script supports trace files from the following platforms: | |
| 42 * Android (the script itself must be run on Linux) | |
| 43 * Linux | |
| 44 * macOS | |
| 45 * Windows | |
| 46 | |
| 47 Important note - the script doesn't check that it symbolizes same binaries | |
| 48 that were used at the time trace was taken. I.e. if you take a trace, change | |
| 49 and rebuild Chrome binaries, the script will blindly use the new binaries. | |
| 50 | |
| 51 === Details === | |
| 52 | |
| 53 There are two formats of heap profiler information: legacy and modern. The | |
| 54 main differences relevant to this script are: | |
| 55 | |
| 56 * In the modern format the stack frame tree, type name mapping, and string | |
| 57 mapping nodes are dumped incrementally. These nodes are dumped in each | |
| 58 memory dump event and carry updates that occurred since the last event. | |
| 59 | |
| 60 For example, let's say that when the first memory dump event is generated | |
| 61 we only know about a function foo() (called from main()) allocating objects | |
| 62 of type "int": | |
| 63 | |
| 64 { | |
| 65 "args": { | |
| 66 "dumps": { | |
| 67 "heaps_v2": { | |
| 68 "maps": { | |
| 69 "nodes": [ | |
| 70 { "id": 1, "name_sid": 1 }, | |
| 71 { "id": 2, "parent": 1, "name_sid": 3 }, | |
| 72 ], | |
| 73 "types": [ | |
| 74 { "id": 1, "name_sid": 2 }, | |
| 75 ], | |
| 76 "strings": [ | |
| 77 { "id": 1, "string": "main()" }, | |
| 78 { "id": 2, "string": "int" }, | |
| 79 { "id": 3, "string": "foo()" }, | |
| 80 ] | |
| 81 }, | |
| 82 "allocators": { ...live allocations per allocator... }, | |
| 83 ... | |
| 84 }, | |
| 85 ... | |
| 86 } | |
| 87 }, | |
| 88 ... | |
| 89 } | |
| 90 | |
| 91 Here: | |
| 92 * 'nodes' node encodes stack frame tree | |
| 93 * 'types' node encodes type name mappings | |
| 94 * 'strings' node encodes string mapping (explained below) | |
| 95 | |
| 96 Then, by the time second memory dump even is generated, we learn about | |
| 97 bar() (called from main()), which also allocated "int" objects. Only the | |
| 98 new information is dumped, i.e. bar() stack frame: | |
| 99 | |
| 100 { | |
| 101 "args": { | |
| 102 "dumps": { | |
| 103 "heaps_v2": { | |
| 104 "maps": { | |
| 105 "nodes": [ | |
| 106 { "id": 2, "parent": 1, "name_sid": 4 }, | |
| 107 ], | |
| 108 "types": [], | |
| 109 "strings": [ | |
| 110 { "id": 4, "string": "bar()" }, | |
| 111 ] | |
| 112 }, | |
| 113 "allocators": { ...live allocations per allocator... }, | |
| 114 ... | |
| 115 }, | |
| 116 ... | |
| 117 } | |
| 118 }, | |
| 119 ... | |
| 120 } | |
| 121 | |
| 122 Note that 'types' node is empty, since there were no updates. All three | |
| 123 nodes ('nodes', types', and 'strings') can be empty if there were no updates | |
| 124 to them. | |
| 125 | |
| 126 For simplicity, when the script updates incremental nodes, it puts updated | |
| 127 content in the first node, and clears all others. I.e. the following stack | |
| 128 frame nodes: | |
| 129 | |
| 130 'nodes': [ | |
| 131 { "id": 1, "name_sid": 1 }, | |
| 132 { "id": 2, "parent": 1, "name_sid": 2 }, | |
| 133 ] | |
| 134 'nodes': [ | |
| 135 { "id": 3, "parent": 2, "name_sid": 3 }, | |
| 136 ] | |
| 137 'nodes': [ | |
| 138 { "id": 4, "parent": 3, "name_sid": 4 }, | |
| 139 { "id": 5, "parent": 1, "name_sid": 5 }, | |
| 140 ] | |
| 141 | |
| 142 After symbolization are written as: | |
| 143 | |
| 144 'nodes': [ | |
| 145 { "id": 1, "name_sid": 1 }, | |
| 146 { "id": 2, "parent": 1, "name_sid": 2 }, | |
| 147 { "id": 3, "parent": 2, "name_sid": 3 }, | |
| 148 { "id": 4, "parent": 3, "name_sid": 4 }, | |
| 149 { "id": 5, "parent": 1, "name_sid": 5 }, | |
| 150 ] | |
| 151 'nodes': [] | |
| 152 'nodes': [] | |
| 153 | |
| 154 | |
| 155 * In contrast, in the legacy format stack frame tree and type mappings are | |
| 156 dumped separately from memory dump events, once per process. | |
| 157 | |
| 158 Here is how trace file with two memory dump events looks like in the | |
| 159 legacy format: | |
| 160 | |
| 161 { | |
| 162 "args": { | |
| 163 "dumps": { | |
| 164 "heaps": { ...live allocations per allocator... }, | |
| 165 ... | |
| 166 } | |
| 167 }, | |
| 168 ... | |
| 169 } | |
| 170 | |
| 171 { | |
| 172 "args": { | |
| 173 "dumps": { | |
| 174 "heaps": { ...live allocations per allocator... }, | |
| 175 ... | |
| 176 } | |
| 177 }, | |
| 178 ... | |
| 179 } | |
| 180 | |
| 181 { | |
| 182 "args": { | |
| 183 "typeNames": { | |
| 184 1: "int", | |
| 185 } | |
| 186 }, | |
| 187 "cat": "__metadata", | |
| 188 "name": "typeNames", | |
| 189 ... | |
| 190 } | |
| 191 | |
| 192 { | |
| 193 "args": { | |
| 194 "stackFrames": { | |
| 195 1: { "name": "main" }, | |
| 196 2: { "name": "foo", "parent": 1 }, | |
| 197 3: { "name": "bar", "parent": 1 }, | |
| 198 } | |
| 199 }, | |
| 200 "cat": "__metadata", | |
| 201 "name": "stackFrames", | |
| 202 ... | |
| 203 } | |
| 204 | |
| 205 | |
| 206 * Another change in the modern format is 'strings' node, which was added | |
| 207 to deduplicate stack frame names (mainly for trace file size reduction). | |
| 208 For consistency 'types' node also uses string mappings. | |
| 209 | |
| 210 | |
| 211 See crbug.com/708930 for more information about the modern format. | |
| 212 """ | |
| 213 | |
| 214 import argparse | |
| 215 import bisect | |
| 216 import collections | |
| 217 import gzip | |
| 218 import itertools | |
| 219 import json | |
| 220 import os | |
| 221 import re | |
| 222 import shutil | |
| 223 import subprocess | |
| 224 import sys | |
| 225 import tarfile | |
| 226 import zipfile | |
| 227 import tempfile | |
| 228 | |
| 229 _SYMBOLS_PATH = os.path.abspath(os.path.join( | |
| 230 os.path.dirname(os.path.realpath(__file__)), | |
| 231 '..', | |
| 232 'third_party', | |
| 233 'symbols')) | |
| 234 sys.path.append(_SYMBOLS_PATH) | |
| 235 # pylint: disable=import-error | |
| 236 import symbols.elf_symbolizer as elf_symbolizer | |
| 237 | |
| 238 import symbolize_trace_atos_regex | |
| 239 import symbolize_trace_macho_reader | |
| 240 | |
| 241 _PY_UTILS_PATH = os.path.abspath(os.path.join( | |
| 242 os.path.dirname(os.path.realpath(__file__)), | |
| 243 '..', | |
| 244 '..', | |
| 245 'common', | |
| 246 'py_utils')) | |
| 247 sys.path.append(_PY_UTILS_PATH) | |
| 248 # pylint: disable=import-error | |
| 249 import py_utils.cloud_storage as cloud_storage | |
| 250 | |
| 251 class NodeWrapper(object): | |
| 252 """Wraps an event data node(s). | |
| 253 | |
| 254 A node is a reference into a trace event JSON. Wrappers parse nodes to | |
| 255 provide convenient APIs and update nodes when asked to propagate changes | |
| 256 back (see ApplyModifications() below). | |
| 257 | |
| 258 Here is an example of legacy metadata event that contains stack frame tree: | |
| 259 | |
| 260 { | |
| 261 "args": { | |
| 262 "stackFrames": { ... } | |
| 263 }, | |
| 264 "cat": "__metadata", | |
| 265 "name": "stackFrames", | |
| 266 "ph": "M", | |
| 267 ... | |
| 268 } | |
| 269 | |
| 270 When this event is encountered, a reference to the "stackFrames" dictionary | |
| 271 is obtained and passed down to a specific wrapped class, which knows how to | |
| 272 parse / update the dictionary. | |
| 273 | |
| 274 There are two parsing patterns depending on whether node is serialized | |
| 275 incrementally: | |
| 276 | |
| 277 * If node is not incremental, then parsing is done by __init__(), | |
| 278 see MemoryMap for an example. | |
| 279 | |
| 280 * If node is incremental, then __init__() does nothing, and instead | |
| 281 ParseNext() method is called when next node (from a next event) is | |
| 282 encountered. | |
| 283 | |
| 284 Some wrappers can also modify nodes they parsed. In such cases they have | |
| 285 additional APIs: | |
| 286 | |
| 287 * 'modified' flag, which indicates whether the wrapper was changed. | |
| 288 | |
| 289 * 'ApplyModifications' method, which propagates changes made to the wrapper | |
| 290 back to nodes. Successful invocation of ApplyModifications() resets | |
| 291 'modified' flag. | |
| 292 | |
| 293 """ | |
| 294 pass | |
| 295 | |
| 296 | |
| 297 class MemoryMap(NodeWrapper): | |
| 298 """Wraps 'process_mmaps' node. | |
| 299 | |
| 300 'process_mmaps' node contains information about file mappings. | |
| 301 | |
| 302 "process_mmaps": { | |
| 303 "vm_regions": [ | |
| 304 { | |
| 305 "mf": "<file_path>", | |
| 306 "sa": "<start_address>", | |
| 307 "sz": "<size>", | |
| 308 ... | |
| 309 }, | |
| 310 ... | |
| 311 ] | |
| 312 } | |
| 313 """ | |
| 314 | |
| 315 class Region(object): | |
| 316 def __init__(self, start_address, size, file_path): | |
| 317 self._start_address = start_address | |
| 318 self._size = size | |
| 319 self._file_path = file_path | |
| 320 | |
| 321 @property | |
| 322 def start_address(self): | |
| 323 return self._start_address | |
| 324 | |
| 325 @property | |
| 326 def end_address(self): | |
| 327 return self._start_address + self._size | |
| 328 | |
| 329 @property | |
| 330 def size(self): | |
| 331 return self._size | |
| 332 | |
| 333 @property | |
| 334 def file_path(self): | |
| 335 return self._file_path | |
| 336 | |
| 337 def __cmp__(self, other): | |
| 338 if isinstance(other, type(self)): | |
| 339 other_start_address = other._start_address | |
| 340 elif isinstance(other, (long, int)): | |
| 341 other_start_address = other | |
| 342 else: | |
| 343 raise Exception('Cannot compare with %s' % type(other)) | |
| 344 if self._start_address < other_start_address: | |
| 345 return -1 | |
| 346 elif self._start_address > other_start_address: | |
| 347 return 1 | |
| 348 else: | |
| 349 return 0 | |
| 350 | |
| 351 def __repr__(self): | |
| 352 return 'Region(0x{:X} - 0x{:X}, {})'.format( | |
| 353 self.start_address, self.end_address, self.file_path) | |
| 354 | |
| 355 def __init__(self, process_mmaps_node): | |
| 356 regions = [] | |
| 357 for region_node in process_mmaps_node['vm_regions']: | |
| 358 regions.append(self.Region( | |
| 359 long(region_node['sa'], 16), | |
| 360 long(region_node['sz'], 16), | |
| 361 region_node['mf'])) | |
| 362 regions.sort() | |
| 363 | |
| 364 # Copy regions without duplicates and check for overlaps. | |
| 365 self._regions = [] | |
| 366 previous_region = None | |
| 367 for region in regions: | |
| 368 if previous_region is not None: | |
| 369 if region == previous_region: | |
| 370 continue | |
| 371 assert region.start_address >= previous_region.end_address, \ | |
| 372 'Regions {} and {} overlap.'.format(previous_region, region) | |
| 373 previous_region = region | |
| 374 self._regions.append(region) | |
| 375 | |
| 376 @property | |
| 377 def regions(self): | |
| 378 return self._regions | |
| 379 | |
| 380 def FindRegion(self, address): | |
| 381 """Finds region containing |address|. Returns None if none found.""" | |
| 382 | |
| 383 region_index = bisect.bisect_right(self._regions, address) - 1 | |
| 384 if region_index >= 0: | |
| 385 region = self._regions[region_index] | |
| 386 if address >= region.start_address and address < region.end_address: | |
| 387 return region | |
| 388 return None | |
| 389 | |
| 390 | |
| 391 class UnsupportedHeapDumpVersionError(Exception): | |
| 392 """Helper exception class to signal unsupported heap dump version.""" | |
| 393 | |
| 394 def __init__(self, version): | |
| 395 message = 'Unsupported heap dump version: {}'.format(version) | |
| 396 super(UnsupportedHeapDumpVersionError, self).__init__(message) | |
| 397 | |
| 398 | |
| 399 class StringMap(NodeWrapper): | |
| 400 """Wraps all 'strings' nodes for a process. | |
| 401 | |
| 402 'strings' node contains incremental mappings between integer ids and strings. | |
| 403 | |
| 404 "strings": [ | |
| 405 { | |
| 406 "id": <string_id>, | |
| 407 "string": <string> | |
| 408 }, | |
| 409 ... | |
| 410 ] | |
| 411 """ | |
| 412 | |
| 413 def __init__(self): | |
| 414 self._modified = False | |
| 415 self._strings_nodes = [] | |
| 416 self._string_by_id = {} | |
| 417 self._id_by_string = {} | |
| 418 self._max_string_id = 0 | |
| 419 | |
| 420 @property | |
| 421 def modified(self): | |
| 422 """Returns True if the wrapper was modified (see NodeWrapper).""" | |
| 423 return self._modified | |
| 424 | |
| 425 @property | |
| 426 def string_by_id(self): | |
| 427 return self._string_by_id | |
| 428 | |
| 429 def ParseNext(self, heap_dump_version, strings_node): | |
| 430 """Parses and interns next node (see NodeWrapper).""" | |
| 431 | |
| 432 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
| 433 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
| 434 | |
| 435 self._strings_nodes.append(strings_node) | |
| 436 for string_node in strings_node: | |
| 437 self._Insert(string_node['id'], string_node['string']) | |
| 438 | |
| 439 def Clear(self): | |
| 440 """Clears all string mappings.""" | |
| 441 if self._string_by_id: | |
| 442 self._modified = True | |
| 443 # ID #0 means 'no entry' and must always be present. Carry it over. | |
| 444 null_string = self._string_by_id[0] | |
| 445 self._string_by_id = {} | |
| 446 self._id_by_string = {} | |
| 447 self._Insert(0, null_string) | |
| 448 self._max_string_id = 0 | |
| 449 | |
| 450 def AddString(self, string): | |
| 451 """Adds a string (if it doesn't exist) and returns its integer id.""" | |
| 452 string_id = self._id_by_string.get(string) | |
| 453 if string_id is None: | |
| 454 string_id = self._max_string_id + 1 | |
| 455 self._Insert(string_id, string) | |
| 456 self._modified = True | |
| 457 return string_id | |
| 458 | |
| 459 def ApplyModifications(self): | |
| 460 """Propagates modifications back to nodes (see NodeWrapper).""" | |
| 461 if not self.modified: | |
| 462 return | |
| 463 | |
| 464 assert self._strings_nodes, 'no nodes' | |
| 465 | |
| 466 # Serialize into the first node, and clear all others. | |
| 467 | |
| 468 for strings_node in self._strings_nodes: | |
| 469 del strings_node[:] | |
| 470 strings_node = self._strings_nodes[0] | |
| 471 for string_id, string in self._string_by_id.iteritems(): | |
| 472 strings_node.append({'id': string_id, 'string': string}) | |
| 473 | |
| 474 self._modified = False | |
| 475 | |
| 476 def _Insert(self, string_id, string): | |
| 477 self._id_by_string[string] = string_id | |
| 478 self._string_by_id[string_id] = string | |
| 479 self._max_string_id = max(self._max_string_id, string_id) | |
| 480 | |
| 481 | |
| 482 class TypeNameMap(NodeWrapper): | |
| 483 """Wraps all 'types' nodes for a process. | |
| 484 | |
| 485 'types' nodes encode mappings between integer type ids and integer | |
| 486 string ids (from 'strings' nodes). | |
| 487 | |
| 488 "types": [ | |
| 489 { | |
| 490 "id": <type_id>, | |
| 491 "name_sid": <name_string_id> | |
| 492 } | |
| 493 ... | |
| 494 ] | |
| 495 | |
| 496 For simplicity string ids are translated into strings during parsing, | |
| 497 and then translated back to ids in ApplyModifications(). | |
| 498 """ | |
| 499 def __init__(self): | |
| 500 self._modified = False | |
| 501 self._type_name_nodes = [] | |
| 502 self._name_by_id = {} | |
| 503 self._id_by_name = {} | |
| 504 self._max_type_id = 0 | |
| 505 | |
| 506 @property | |
| 507 def modified(self): | |
| 508 """Returns True if the wrapper was modified (see NodeWrapper).""" | |
| 509 return self._modified | |
| 510 | |
| 511 @property | |
| 512 def name_by_id(self): | |
| 513 """Returns {id -> name} dict (must not be changed directly).""" | |
| 514 return self._name_by_id | |
| 515 | |
| 516 def ParseNext(self, heap_dump_version, type_name_node, string_map): | |
| 517 """Parses and interns next node (see NodeWrapper). | |
| 518 | |
| 519 |string_map| - A StringMap object to use to translate string ids | |
| 520 to strings. | |
| 521 """ | |
| 522 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
| 523 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
| 524 | |
| 525 self._type_name_nodes.append(type_name_node) | |
| 526 for type_node in type_name_node: | |
| 527 self._Insert(type_node['id'], | |
| 528 string_map.string_by_id[type_node['name_sid']]) | |
| 529 | |
| 530 def AddType(self, type_name): | |
| 531 """Adds a type name (if it doesn't exist) and returns its id.""" | |
| 532 type_id = self._id_by_name.get(type_name) | |
| 533 if type_id is None: | |
| 534 type_id = self._max_type_id + 1 | |
| 535 self._Insert(type_id, type_name) | |
| 536 self._modified = True | |
| 537 return type_id | |
| 538 | |
| 539 def ApplyModifications(self, string_map, force=False): | |
| 540 """Propagates modifications back to nodes. | |
| 541 | |
| 542 |string_map| - A StringMap object to use to translate strings to ids. | |
| 543 |force| - Whether to propagate changes regardless of 'modified' flag. | |
| 544 """ | |
| 545 if not self.modified and not force: | |
| 546 return | |
| 547 | |
| 548 assert self._type_name_nodes, 'no nodes' | |
| 549 | |
| 550 # Serialize into the first node, and clear all others. | |
| 551 | |
| 552 for types_node in self._type_name_nodes: | |
| 553 del types_node[:] | |
| 554 types_node = self._type_name_nodes[0] | |
| 555 for type_id, type_name in self._name_by_id.iteritems(): | |
| 556 types_node.append({ | |
| 557 'id': type_id, | |
| 558 'name_sid': string_map.AddString(type_name)}) | |
| 559 | |
| 560 self._modified = False | |
| 561 | |
| 562 def _Insert(self, type_id, type_name): | |
| 563 self._id_by_name[type_name] = type_id | |
| 564 self._name_by_id[type_id] = type_name | |
| 565 self._max_type_id = max(self._max_type_id, type_id) | |
| 566 | |
| 567 | |
| 568 class StackFrameMap(NodeWrapper): | |
| 569 """ Wraps stack frame tree nodes for a process. | |
| 570 | |
| 571 For the legacy format this wrapper expects a single 'stackFrames' node | |
| 572 (which comes from metadata event): | |
| 573 | |
| 574 "stackFrames": { | |
| 575 "<frame_id>": { | |
| 576 "name": "<frame_name>" | |
| 577 "parent": "<parent_frame_id>" | |
| 578 }, | |
| 579 ... | |
| 580 } | |
| 581 | |
| 582 For the modern format this wrapper expects several 'nodes' nodes: | |
| 583 | |
| 584 "nodes": [ | |
| 585 { | |
| 586 "id": <frame_id>, | |
| 587 "parent": <parent_frame_id>, | |
| 588 "name_sid": <name_string_id> | |
| 589 }, | |
| 590 ... | |
| 591 ] | |
| 592 | |
| 593 In both formats frame name is a string. Native heap profiler generates | |
| 594 specially formatted frame names (e.g. "pc:10eb78dba") for function | |
| 595 addresses (PCs). Inner Frame class below parses name and extracts PC, | |
| 596 if it's there. | |
| 597 """ | |
| 598 class Frame(object): | |
| 599 def __init__(self, frame_id, name, parent_frame_id): | |
| 600 self._modified = False | |
| 601 self._id = frame_id | |
| 602 self._name = name | |
| 603 self._pc = self._ParsePC(name) | |
| 604 self._parent_id = parent_frame_id | |
| 605 self._ext = None | |
| 606 | |
| 607 @property | |
| 608 def modified(self): | |
| 609 """Returns True if the frame was modified. | |
| 610 | |
| 611 For example changing frame's name sets this flag (since the change | |
| 612 needs to be propagated back to nodes). | |
| 613 """ | |
| 614 return self._modified | |
| 615 | |
| 616 @property | |
| 617 def id(self): | |
| 618 """Frame id (integer).""" | |
| 619 return self._id | |
| 620 | |
| 621 @property | |
| 622 def pc(self): | |
| 623 """Parsed (integer) PC of the frame, or None.""" | |
| 624 return self._pc | |
| 625 | |
| 626 @property | |
| 627 def name(self): | |
| 628 """Name of the frame (see above).""" | |
| 629 return self._name | |
| 630 | |
| 631 @name.setter | |
| 632 def name(self, value): | |
| 633 """Changes the name. Doesn't affect value of |pc|.""" | |
| 634 self._modified = True | |
| 635 self._name = value | |
| 636 | |
| 637 @property | |
| 638 def parent_id(self): | |
| 639 """Parent frame id (integer).""" | |
| 640 return self._parent_id | |
| 641 | |
| 642 _PC_TAG = 'pc:' | |
| 643 | |
| 644 def _ParsePC(self, name): | |
| 645 if not name.startswith(self._PC_TAG): | |
| 646 return None | |
| 647 return long(name[len(self._PC_TAG):], 16) | |
| 648 | |
| 649 def _ClearModified(self): | |
| 650 self._modified = False | |
| 651 | |
| 652 def __init__(self): | |
| 653 self._modified = False | |
| 654 self._heap_dump_version = None | |
| 655 self._stack_frames_nodes = [] | |
| 656 self._frame_by_id = {} | |
| 657 | |
| 658 @property | |
| 659 def modified(self): | |
| 660 """Returns True if the wrapper or any of its frames were modified.""" | |
| 661 return (self._modified or | |
| 662 any(f.modified for f in self._frame_by_id.itervalues())) | |
| 663 | |
| 664 @property | |
| 665 def frame_by_id(self): | |
| 666 """Returns {id -> frame} dict (must not be modified directly).""" | |
| 667 return self._frame_by_id | |
| 668 | |
| 669 def ParseNext(self, heap_dump_version, stack_frames_node, string_map): | |
| 670 """Parses the next stack frames node (see NodeWrapper). | |
| 671 | |
| 672 For the modern format |string_map| is used to translate string ids | |
| 673 to strings. | |
| 674 """ | |
| 675 | |
| 676 frame_by_id = {} | |
| 677 if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 678 if self._stack_frames_nodes: | |
| 679 raise Exception('Legacy stack frames node is expected only once.') | |
| 680 for frame_id, frame_node in stack_frames_node.iteritems(): | |
| 681 frame = self.Frame(frame_id, | |
| 682 frame_node['name'], | |
| 683 frame_node.get('parent')) | |
| 684 frame_by_id[frame.id] = frame | |
| 685 else: | |
| 686 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1: | |
| 687 raise UnsupportedHeapDumpVersionError(heap_dump_version) | |
| 688 for frame_node in stack_frames_node: | |
| 689 frame = self.Frame(frame_node['id'], | |
| 690 string_map.string_by_id[frame_node['name_sid']], | |
| 691 frame_node.get('parent')) | |
| 692 frame_by_id[frame.id] = frame | |
| 693 | |
| 694 self._heap_dump_version = heap_dump_version | |
| 695 self._stack_frames_nodes.append(stack_frames_node) | |
| 696 | |
| 697 self._frame_by_id.update(frame_by_id) | |
| 698 | |
| 699 def ApplyModifications(self, string_map, force=False): | |
| 700 """Applies modifications back to nodes (see NodeWrapper).""" | |
| 701 | |
| 702 if not self.modified and not force: | |
| 703 return | |
| 704 | |
| 705 assert self._stack_frames_nodes, 'no nodes' | |
| 706 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 707 assert string_map is None, \ | |
| 708 'string_map should not be used with the legacy format' | |
| 709 | |
| 710 # Serialize frames into the first node, clear all others. | |
| 711 | |
| 712 for frames_node in self._stack_frames_nodes: | |
| 713 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 714 frames_node.clear() | |
| 715 else: | |
| 716 del frames_node[:] | |
| 717 | |
| 718 frames_node = self._stack_frames_nodes[0] | |
| 719 for frame in self._frame_by_id.itervalues(): | |
| 720 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 721 frame_node = {'name': frame.name} | |
| 722 frames_node[frame.id] = frame_node | |
| 723 else: | |
| 724 frame_node = { | |
| 725 'id': frame.id, | |
| 726 'name_sid': string_map.AddString(frame.name) | |
| 727 } | |
| 728 frames_node.append(frame_node) | |
| 729 if frame.parent_id is not None: | |
| 730 frame_node['parent'] = frame.parent_id | |
| 731 frame._ClearModified() | |
| 732 | |
| 733 self._modified = False | |
| 734 | |
| 735 | |
| 736 class Trace(NodeWrapper): | |
| 737 """Wrapper for the root trace node (i.e. the trace JSON itself). | |
| 738 | |
| 739 This wrapper parses select nodes from memory-infra events and groups | |
| 740 parsed data per-process (see inner Process class below). | |
| 741 """ | |
| 742 | |
| 743 # Indicates legacy heap dump format. | |
| 744 HEAP_DUMP_VERSION_LEGACY = 'Legacy' | |
| 745 | |
| 746 # Indicates variation of a modern heap dump format. | |
| 747 HEAP_DUMP_VERSION_1 = 1 | |
| 748 | |
| 749 class Process(object): | |
| 750 """Collection of per-process data and wrappers.""" | |
| 751 | |
| 752 def __init__(self, pid): | |
| 753 self._pid = pid | |
| 754 self._name = None | |
| 755 self._memory_map = None | |
| 756 self._stack_frame_map = StackFrameMap() | |
| 757 self._type_name_map = TypeNameMap() | |
| 758 self._string_map = StringMap() | |
| 759 self._heap_dump_version = None | |
| 760 | |
| 761 @property | |
| 762 def modified(self): | |
| 763 return self._stack_frame_map.modified or self._type_name_map.modified | |
| 764 | |
| 765 @property | |
| 766 def pid(self): | |
| 767 return self._pid | |
| 768 | |
| 769 @property | |
| 770 def name(self): | |
| 771 return self._name | |
| 772 | |
| 773 @property | |
| 774 def unique_name(self): | |
| 775 """Returns string that includes both process name and its pid.""" | |
| 776 name = self._name if self._name else 'UnnamedProcess' | |
| 777 return '{}({})'.format(name, self._pid) | |
| 778 | |
| 779 @property | |
| 780 def memory_map(self): | |
| 781 return self._memory_map | |
| 782 | |
| 783 @property | |
| 784 def stack_frame_map(self): | |
| 785 return self._stack_frame_map | |
| 786 | |
| 787 @property | |
| 788 def type_name_map(self): | |
| 789 return self._type_name_map | |
| 790 | |
| 791 def ApplyModifications(self): | |
| 792 """Calls ApplyModifications() on contained wrappers.""" | |
| 793 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY: | |
| 794 self._stack_frame_map.ApplyModifications(None) | |
| 795 else: | |
| 796 if self._stack_frame_map.modified or self._type_name_map.modified: | |
| 797 self._string_map.Clear() | |
| 798 self._stack_frame_map.ApplyModifications(self._string_map, force=True) | |
| 799 self._type_name_map.ApplyModifications(self._string_map, force=True) | |
| 800 self._string_map.ApplyModifications() | |
| 801 | |
| 802 def __init__(self, trace_node): | |
| 803 self._trace_node = trace_node | |
| 804 self._processes = [] | |
| 805 self._heap_dump_version = None | |
| 806 self._version = None | |
| 807 self._is_chromium = True | |
| 808 self._is_64bit = False | |
| 809 self._is_win = False | |
| 810 self._is_mac = False | |
| 811 | |
| 812 # Misc per-process information needed only during parsing. | |
| 813 class ProcessExt(object): | |
| 814 def __init__(self, pid): | |
| 815 self.process = Trace.Process(pid) | |
| 816 self.mapped_entry_names = set() | |
| 817 self.process_mmaps_node = None | |
| 818 self.seen_strings_node = False | |
| 819 | |
| 820 process_ext_by_pid = {} | |
| 821 | |
| 822 if isinstance(trace_node, dict): | |
| 823 metadata = trace_node['metadata'] | |
| 824 product_version = metadata['product-version'] | |
| 825 # product-version has the form "Chrome/60.0.3103.0" | |
| 826 self._version = product_version.split('/', 1)[-1] | |
| 827 | |
| 828 command_line = metadata['command_line'] | |
| 829 self._is_win = re.search('windows', metadata['os-name'] , re.IGNORECASE) | |
| 830 self._is_mac = re.search('mac', metadata['os-name'] , re.IGNORECASE) | |
| 831 | |
| 832 if self._is_win: | |
| 833 self._is_chromium = ( | |
| 834 not re.search('Chrome SxS\\\\Application\\\\chrome.exe', command_line, | |
| 835 re.IGNORECASE) and | |
| 836 not re.search('Chrome\\\\Application\\\\chrome.exe', command_line, | |
| 837 re.IGNORECASE)) | |
| 838 if self._is_mac: | |
| 839 self._is_chromium = re.search('chromium', command_line, re.IGNORECASE) | |
| 840 | |
| 841 self._is_64bit = ( | |
| 842 re.search('x86_64', metadata['os-arch'] , re.IGNORECASE) and | |
| 843 not re.search('WOW64', metadata['user-agent'] , re.IGNORECASE)) | |
| 844 | |
| 845 # Android traces produced via 'chrome://inspect/?tracing#devices' are | |
| 846 # just list of events. | |
| 847 events = trace_node if isinstance(trace_node, list) \ | |
| 848 else trace_node['traceEvents'] | |
| 849 for event in events: | |
| 850 name = event.get('name') | |
| 851 if not name: | |
| 852 continue | |
| 853 | |
| 854 pid = event['pid'] | |
| 855 process_ext = process_ext_by_pid.get(pid) | |
| 856 if process_ext is None: | |
| 857 process_ext = ProcessExt(pid) | |
| 858 process_ext_by_pid[pid] = process_ext | |
| 859 process = process_ext.process | |
| 860 | |
| 861 phase = event['ph'] | |
| 862 if phase == self._EVENT_PHASE_METADATA: | |
| 863 if name == 'process_name': | |
| 864 process._name = event['args']['name'] | |
| 865 elif name == 'stackFrames': | |
| 866 process._stack_frame_map.ParseNext( | |
| 867 self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY), | |
| 868 event['args']['stackFrames'], | |
| 869 process._string_map) | |
| 870 elif phase == self._EVENT_PHASE_MEMORY_DUMP: | |
| 871 dumps = event['args']['dumps'] | |
| 872 process_mmaps = dumps.get('process_mmaps') | |
| 873 if process_mmaps: | |
| 874 # We want the most recent memory map, so parsing happens later | |
| 875 # once we finished reading all events. | |
| 876 process_ext.process_mmaps_node = process_mmaps | |
| 877 heaps = dumps.get('heaps_v2') | |
| 878 if heaps: | |
| 879 version = self._UseHeapDumpVersion(heaps['version']) | |
| 880 maps = heaps.get('maps') | |
| 881 if maps: | |
| 882 process_ext.mapped_entry_names.update(maps.iterkeys()) | |
| 883 types = maps.get('types') | |
| 884 stack_frames = maps.get('nodes') | |
| 885 strings = maps.get('strings') | |
| 886 if (strings is None and (types or stack_frames) | |
| 887 and not process_ext.seen_strings_node): | |
| 888 # ApplyModifications() for TypeNameMap and StackFrameMap puts | |
| 889 # everything into the first node and depends on StringMap. So | |
| 890 # we need to make sure that 'strings' node is there if any of | |
| 891 # other two nodes present. | |
| 892 strings = [] | |
| 893 maps['strings'] = strings | |
| 894 if strings is not None: | |
| 895 process_ext.seen_strings_node = True | |
| 896 process._string_map.ParseNext(version, strings) | |
| 897 if types: | |
| 898 process._type_name_map.ParseNext( | |
| 899 version, types, process._string_map) | |
| 900 if stack_frames: | |
| 901 process._stack_frame_map.ParseNext( | |
| 902 version, stack_frames, process._string_map) | |
| 903 | |
| 904 self._processes = [] | |
| 905 for pe in process_ext_by_pid.itervalues(): | |
| 906 pe.process._heap_dump_version = self._heap_dump_version | |
| 907 if pe.process_mmaps_node: | |
| 908 # Now parse the most recent memory map. | |
| 909 pe.process._memory_map = MemoryMap(pe.process_mmaps_node) | |
| 910 self._processes.append(pe.process) | |
| 911 | |
| 912 @property | |
| 913 def node(self): | |
| 914 """Root node (that was passed to the __init__).""" | |
| 915 return self._trace_node | |
| 916 | |
| 917 @property | |
| 918 def modified(self): | |
| 919 """Returns True if trace file needs to be updated. | |
| 920 | |
| 921 Before writing trace JSON back to a file ApplyModifications() needs | |
| 922 to be called. | |
| 923 """ | |
| 924 return any(p.modified for p in self._processes) | |
| 925 | |
| 926 @property | |
| 927 def processes(self): | |
| 928 return self._processes | |
| 929 | |
| 930 @property | |
| 931 def heap_dump_version(self): | |
| 932 return self._heap_dump_version | |
| 933 | |
| 934 @property | |
| 935 def version(self): | |
| 936 return self._version | |
| 937 | |
| 938 @property | |
| 939 def is_chromium(self): | |
| 940 return self._is_chromium | |
| 941 | |
| 942 @property | |
| 943 def is_mac(self): | |
| 944 return self._is_mac | |
| 945 | |
| 946 @property | |
| 947 def is_win(self): | |
| 948 return self._is_win | |
| 949 | |
| 950 @property | |
| 951 def is_64bit(self): | |
| 952 return self._is_64bit | |
| 953 | |
| 954 def ApplyModifications(self): | |
| 955 """Propagates modifications back to the trace JSON.""" | |
| 956 for process in self._processes: | |
| 957 process.ApplyModifications() | |
| 958 assert not self.modified, 'still modified' | |
| 959 | |
| 960 # Relevant trace event phases from Chromium's | |
| 961 # src/base/trace_event/common/trace_event_common.h. | |
| 962 _EVENT_PHASE_METADATA = 'M' | |
| 963 _EVENT_PHASE_MEMORY_DUMP = 'v' | |
| 964 | |
| 965 def _UseHeapDumpVersion(self, version): | |
| 966 if self._heap_dump_version is None: | |
| 967 self._heap_dump_version = version | |
| 968 return version | |
| 969 elif self._heap_dump_version != version: | |
| 970 raise Exception( | |
| 971 ("Inconsistent trace file: first saw '{}' heap dump version, " | |
| 972 "then '{}'.").format(self._heap_dump_version, version)) | |
| 973 else: | |
| 974 return version | |
| 975 | |
| 976 | |
| 977 class SymbolizableFile(object): | |
| 978 """Holds file path, addresses to symbolize and stack frames to update. | |
| 979 | |
| 980 This class is a link between ELFSymbolizer and a trace file: it specifies | |
| 981 what to symbolize (addresses) and what to update with the symbolization | |
| 982 result (frames). | |
| 983 """ | |
| 984 def __init__(self, file_path): | |
| 985 self.path = file_path | |
| 986 self.symbolizable_path = file_path # path to use for symbolization | |
| 987 self.frames_by_address = collections.defaultdict(list) | |
| 988 | |
| 989 | |
| 990 def ResolveSymbolizableFiles(processes): | |
| 991 """Resolves and groups PCs into list of SymbolizableFiles. | |
| 992 | |
| 993 As part of the grouping process, this function resolves PC from each stack | |
| 994 frame to the corresponding mmap region. Stack frames that failed to resolve | |
| 995 are symbolized with '<unresolved>'. | |
| 996 """ | |
| 997 symfile_by_path = {} | |
| 998 for process in processes: | |
| 999 if not process.memory_map: | |
| 1000 continue | |
| 1001 for frame in process.stack_frame_map.frame_by_id.itervalues(): | |
| 1002 if frame.pc is None: | |
| 1003 continue | |
| 1004 region = process.memory_map.FindRegion(frame.pc) | |
| 1005 if region is None: | |
| 1006 frame.name = '<unresolved>' | |
| 1007 continue | |
| 1008 | |
| 1009 symfile = symfile_by_path.get(region.file_path) | |
| 1010 if symfile is None: | |
| 1011 symfile = SymbolizableFile(region.file_path) | |
| 1012 symfile_by_path[symfile.path] = symfile | |
| 1013 | |
| 1014 relative_pc = frame.pc - region.start_address | |
| 1015 symfile.frames_by_address[relative_pc].append(frame) | |
| 1016 return symfile_by_path.values() | |
| 1017 | |
| 1018 | |
| 1019 def FindInSystemPath(binary_name): | |
| 1020 paths = os.environ['PATH'].split(os.pathsep) | |
| 1021 for path in paths: | |
| 1022 binary_path = os.path.join(path, binary_name) | |
| 1023 if os.path.isfile(binary_path): | |
| 1024 return binary_path | |
| 1025 return None | |
| 1026 | |
| 1027 | |
| 1028 class Symbolizer(object): | |
| 1029 """Encapsulates platform-specific symbolization logic.""" | |
| 1030 | |
| 1031 def __init__(self): | |
| 1032 self.is_mac = sys.platform == 'darwin' | |
| 1033 self.is_win = sys.platform == 'win32' | |
| 1034 if self.is_mac: | |
| 1035 self.binary = 'atos' | |
| 1036 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher() | |
| 1037 elif self.is_win: | |
| 1038 self.binary = 'addr2line-pdb.exe' | |
| 1039 else: | |
| 1040 self.binary = 'addr2line' | |
| 1041 self.symbolizer_path = FindInSystemPath(self.binary) | |
| 1042 | |
| 1043 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name): | |
| 1044 def _SymbolizerCallback(sym_info, frames): | |
| 1045 # Unwind inline chain to the top. | |
| 1046 while sym_info.inlined_by: | |
| 1047 sym_info = sym_info.inlined_by | |
| 1048 | |
| 1049 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name | |
| 1050 for frame in frames: | |
| 1051 frame.name = symbolized_name | |
| 1052 | |
| 1053 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path, | |
| 1054 self.symbolizer_path, | |
| 1055 _SymbolizerCallback, | |
| 1056 inlines=True) | |
| 1057 | |
| 1058 for address, frames in symfile.frames_by_address.iteritems(): | |
| 1059 # SymbolizeAsync() asserts that the type of address is int. We operate | |
| 1060 # on longs (since they are raw pointers possibly from 64-bit processes). | |
| 1061 # It's OK to cast here because we're passing relative PC, which should | |
| 1062 # always fit into int. | |
| 1063 symbolizer.SymbolizeAsync(int(address), frames) | |
| 1064 | |
| 1065 symbolizer.Join() | |
| 1066 | |
| 1067 | |
| 1068 def _SymbolizeMac(self, symfile): | |
| 1069 load_address = (symbolize_trace_macho_reader. | |
| 1070 ReadMachOTextLoadAddress(symfile.symbolizable_path)) | |
| 1071 assert load_address is not None | |
| 1072 | |
| 1073 address_os_file, address_file_path = tempfile.mkstemp() | |
| 1074 try: | |
| 1075 with os.fdopen(address_os_file, 'w') as address_file: | |
| 1076 for address in symfile.frames_by_address.iterkeys(): | |
| 1077 address_file.write('{:x} '.format(address + load_address)) | |
| 1078 | |
| 1079 cmd = [self.symbolizer_path, '-arch', 'x86_64', '-l', | |
| 1080 '0x%x' % load_address, '-o', symfile.symbolizable_path, | |
| 1081 '-f', address_file_path] | |
| 1082 output_array = subprocess.check_output(cmd).split('\n') | |
| 1083 | |
| 1084 for i, frames in enumerate(symfile.frames_by_address.itervalues()): | |
| 1085 symbolized_name = self._matcher.Match(output_array[i]) | |
| 1086 for frame in frames: | |
| 1087 frame.name = symbolized_name | |
| 1088 finally: | |
| 1089 os.remove(address_file_path) | |
| 1090 | |
| 1091 def _SymbolizeWin(self, symfile): | |
| 1092 """Invoke symbolizer binary on windows and write all input in one go. | |
| 1093 | |
| 1094 Unlike linux, on windows, symbolization talks through a shared system | |
| 1095 service that handles communication with the NT symbol servers. This | |
| 1096 creates an explicit serialization (and therefor lock contention) of | |
| 1097 any process using the symbol API for files do not have a local PDB. | |
| 1098 | |
| 1099 Thus, even though the windows symbolizer binary can be make command line | |
| 1100 compatible with the POSIX addr2line interface, parallelizing the | |
| 1101 symbolization does not yield the same performance effects. Running | |
| 1102 just one symbolizer seems good enough for now. Can optimize later | |
| 1103 if this becomes a bottleneck. | |
| 1104 """ | |
| 1105 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe', | |
| 1106 symfile.symbolizable_path] | |
| 1107 | |
| 1108 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, | |
| 1109 stderr=sys.stderr) | |
| 1110 addrs = ["%x" % relative_pc for relative_pc in | |
| 1111 symfile.frames_by_address.keys()] | |
| 1112 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs)) | |
| 1113 stdout_data = stdout_data.split('\n') | |
| 1114 | |
| 1115 # This is known to be in the same order as stderr_data. | |
| 1116 for i, addr in enumerate(addrs): | |
| 1117 for frame in symfile.frames_by_address[int(addr, 16)]: | |
| 1118 # Output of addr2line with --functions is always 2 outputs per | |
| 1119 # symbol, function name followed by source line number. Only grab | |
| 1120 # the function name as line info is not always available. | |
| 1121 frame.name = stdout_data[i * 2] | |
| 1122 | |
| 1123 def Symbolize(self, symfile, unsymbolized_name): | |
| 1124 if self.is_mac: | |
| 1125 self._SymbolizeMac(symfile) | |
| 1126 elif self.is_win: | |
| 1127 self._SymbolizeWin(symfile) | |
| 1128 else: | |
| 1129 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name) | |
| 1130 | |
| 1131 def IsSymbolizableFile(self, file_path): | |
| 1132 if self.is_win: | |
| 1133 extension = os.path.splitext(file_path)[1].lower() | |
| 1134 return extension in ['.dll', '.exe'] | |
| 1135 else: | |
| 1136 result = subprocess.check_output(['file', '-0', file_path]) | |
| 1137 type_string = result[result.find('\0') + 1:] | |
| 1138 return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*', | |
| 1139 type_string, re.DOTALL)) | |
| 1140 | |
| 1141 | |
| 1142 def SymbolizeFiles(symfiles, symbolizer): | |
| 1143 """Symbolizes each file in the given list of SymbolizableFiles | |
| 1144 and updates stack frames with symbolization results.""" | |
| 1145 | |
| 1146 if not symfiles: | |
| 1147 print 'Nothing to symbolize.' | |
| 1148 return | |
| 1149 | |
| 1150 print 'Symbolizing...' | |
| 1151 | |
| 1152 def _SubPrintf(message, *args): | |
| 1153 print (' ' + message).format(*args) | |
| 1154 | |
| 1155 for symfile in symfiles: | |
| 1156 unsymbolized_name = '<{}>'.format( | |
| 1157 symfile.path if symfile.path else 'unnamed') | |
| 1158 | |
| 1159 problem = None | |
| 1160 if not os.path.isabs(symfile.symbolizable_path): | |
| 1161 problem = 'not a file' | |
| 1162 elif not os.path.isfile(symfile.symbolizable_path): | |
| 1163 problem = "file doesn't exist" | |
| 1164 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path): | |
| 1165 problem = 'file is not symbolizable' | |
| 1166 if problem: | |
| 1167 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", | |
| 1168 len(symfile.frames_by_address), | |
| 1169 symfile.symbolizable_path, | |
| 1170 problem) | |
| 1171 for frames in symfile.frames_by_address.itervalues(): | |
| 1172 for frame in frames: | |
| 1173 frame.name = unsymbolized_name | |
| 1174 continue | |
| 1175 | |
| 1176 _SubPrintf('Symbolizing {} PCs from {}...', | |
| 1177 len(symfile.frames_by_address), | |
| 1178 symfile.symbolizable_path) | |
| 1179 | |
| 1180 symbolizer.Symbolize(symfile, unsymbolized_name) | |
| 1181 | |
| 1182 | |
| 1183 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) | |
| 1184 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available | |
| 1185 # via 'name' group. | |
| 1186 ANDROID_PATH_MATCHER = re.compile( | |
| 1187 r'^/data/(?:' | |
| 1188 r'app/[^/]+/lib/[^/]+/|' | |
| 1189 r'app-lib/[^/]+/|' | |
| 1190 r'data/[^/]+/incremental-install-files/lib/' | |
| 1191 r')(?P<name>.*\.so)') | |
| 1192 | |
| 1193 # Subpath of output path where unstripped libraries are stored. | |
| 1194 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' | |
| 1195 | |
| 1196 | |
| 1197 def HaveFilesFromAndroid(symfiles): | |
| 1198 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) | |
| 1199 | |
| 1200 | |
| 1201 def RemapAndroidFiles(symfiles, output_path): | |
| 1202 for symfile in symfiles: | |
| 1203 match = ANDROID_PATH_MATCHER.match(symfile.path) | |
| 1204 if match: | |
| 1205 name = match.group('name') | |
| 1206 symfile.symbolizable_path = os.path.join( | |
| 1207 output_path, ANDROID_UNSTRIPPED_SUBPATH, name) | |
| 1208 else: | |
| 1209 # Clobber file path to trigger "not a file" problem in SymbolizeFiles(). | |
| 1210 # Without this, files won't be symbolized with "file not found" problem, | |
| 1211 # which is not accurate. | |
| 1212 symfile.symbolizable_path = 'android://{}'.format(symfile.path) | |
| 1213 | |
| 1214 | |
| 1215 def RemapMacFiles(symfiles, symbol_base_directory, version): | |
| 1216 suffix = ("Google Chrome Framework.dSYM/Contents/Resources/DWARF/" | |
| 1217 "Google Chrome Framework") | |
| 1218 symbol_sub_dir = os.path.join(symbol_base_directory, version) | |
| 1219 symbolizable_path = os.path.join(symbol_sub_dir, suffix) | |
| 1220 | |
| 1221 for symfile in symfiles: | |
| 1222 if symfile.path.endswith("Google Chrome Framework"): | |
| 1223 symfile.symbolizable_path = symbolizable_path | |
| 1224 | |
| 1225 def RemapWinFiles(symfiles, symbol_base_directory, version, is64bit): | |
| 1226 folder = "win64" if is64bit else "win" | |
| 1227 symbol_sub_dir = os.path.join(symbol_base_directory, | |
| 1228 "chrome-" + folder + "-" + version) | |
| 1229 for symfile in symfiles: | |
| 1230 image = os.path.join(symbol_sub_dir, os.path.basename(symfile.path)) | |
| 1231 symbols = image + ".pdb" | |
| 1232 if os.path.isfile(image) and os.path.isfile(symbols): | |
| 1233 symfile.symbolizable_path = image | |
| 1234 | |
| 1235 def Symbolize(options, trace, symbolizer): | |
| 1236 symfiles = ResolveSymbolizableFiles(trace.processes) | |
| 1237 | |
| 1238 # Android trace files don't have any indication they are from Android. | |
| 1239 # So we're checking for Android-specific paths. | |
| 1240 if HaveFilesFromAndroid(symfiles): | |
| 1241 if not options.output_directory: | |
| 1242 sys.exit('The trace file appears to be from Android. Please ' | |
| 1243 'specify output directory to properly symbolize it.') | |
| 1244 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) | |
| 1245 | |
| 1246 | |
| 1247 if not trace.is_chromium: | |
| 1248 if symbolizer.is_mac: | |
| 1249 RemapMacFiles(symfiles, options.symbol_base_directory, trace.version) | |
| 1250 if symbolizer.is_win: | |
| 1251 RemapWinFiles(symfiles, options.symbol_base_directory, trace.version, | |
| 1252 trace.is_64bit) | |
| 1253 | |
| 1254 SymbolizeFiles(symfiles, symbolizer) | |
| 1255 | |
| 1256 | |
| 1257 def OpenTraceFile(file_path, mode): | |
| 1258 if file_path.endswith('.gz'): | |
| 1259 return gzip.open(file_path, mode + 'b') | |
| 1260 else: | |
| 1261 return open(file_path, mode + 't') | |
| 1262 | |
| 1263 | |
| 1264 def FetchAndExtractSymbolsMac(symbol_base_directory, version): | |
| 1265 def GetLocalPath(base_dir, version): | |
| 1266 return os.path.join(base_dir, version + ".tar.bz2") | |
| 1267 def GetSymbolsPath(version): | |
| 1268 return "desktop-*/" + version + "/mac64/Google Chrome.dSYM.tar.bz2" | |
| 1269 def ExtractSymbolTarFile(symbol_sub_dir, symbol_tar_file): | |
| 1270 os.makedirs(symbol_sub_dir) | |
| 1271 with tarfile.open(os.path.expanduser(symbol_tar_file), "r:bz2") as tar: | |
| 1272 tar.extractall(symbol_sub_dir) | |
| 1273 | |
| 1274 symbol_sub_dir = os.path.join(symbol_base_directory, version) | |
| 1275 if os.path.isdir(symbol_sub_dir): | |
| 1276 return True | |
| 1277 | |
| 1278 bzip_path = GetLocalPath(symbol_base_directory, version) | |
| 1279 if not os.path.isfile(bzip_path): | |
| 1280 | |
| 1281 _CLOUD_STORAGE_BUCKET = "chrome-unsigned" | |
| 1282 if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version)): | |
| 1283 print "Can't find symbols on GCS." | |
| 1284 return False | |
| 1285 print "Downloading symbols files from GCS, please wait." | |
| 1286 cloud_storage.Get(_CLOUD_STORAGE_BUCKET, GetSymbolsPath(version), bzip_path) | |
| 1287 | |
| 1288 ExtractSymbolTarFile(symbol_sub_dir, bzip_path) | |
| 1289 return True | |
| 1290 | |
| 1291 | |
| 1292 def FetchAndExtractSymbolsWin(symbol_base_directory, version, is64bit): | |
| 1293 def DownloadAndExtractZipFile(zip_path, source, destination): | |
| 1294 if not os.path.isfile(zip_path): | |
| 1295 _CLOUD_STORAGE_BUCKET = "chrome-unsigned" | |
| 1296 if not cloud_storage.Exists(_CLOUD_STORAGE_BUCKET, source): | |
| 1297 print "Can't find symbols on GCS." | |
| 1298 return False | |
| 1299 print "Downloading symbols files from GCS, please wait." | |
| 1300 cloud_storage.Get(_CLOUD_STORAGE_BUCKET, source, zip_path) | |
| 1301 if not os.path.isfile(zip_path): | |
| 1302 print "Can't download symbols on GCS." | |
| 1303 return False | |
| 1304 with zipfile.ZipFile(zip_path, "r") as zip: | |
| 1305 for member in zip.namelist(): | |
| 1306 filename = os.path.basename(member) | |
| 1307 # Skip directories. | |
| 1308 if not filename: | |
| 1309 continue | |
| 1310 # Extract archived files. | |
| 1311 source = zip.open(member) | |
| 1312 target = file(os.path.join(symbol_sub_dir, filename), "wb") | |
| 1313 with source, target: | |
| 1314 shutil.copyfileobj(source, target) | |
| 1315 | |
| 1316 folder = "win64" if is64bit else "win" | |
| 1317 gcs_folder = "desktop-*/" + version + "/" + folder + "-pgo/" | |
| 1318 | |
| 1319 symbol_sub_dir = os.path.join(symbol_base_directory, | |
| 1320 "chrome-" + folder + "-" + version) | |
| 1321 if os.path.isdir(symbol_sub_dir): | |
| 1322 return True | |
| 1323 | |
| 1324 os.makedirs(symbol_sub_dir) | |
| 1325 DownloadAndExtractZipFile( | |
| 1326 os.path.join(symbol_base_directory, | |
| 1327 "chrome-" + folder + "-" + version + "-syms.zip"), | |
| 1328 gcs_folder + "chrome-win32-syms.zip", | |
| 1329 symbol_sub_dir) | |
| 1330 DownloadAndExtractZipFile( | |
| 1331 os.path.join(symbol_base_directory, | |
| 1332 "chrome-" + folder + "-" + version + ".zip"), | |
| 1333 gcs_folder + "chrome-" + folder + "-pgo.zip", | |
| 1334 symbol_sub_dir) | |
| 1335 | |
| 1336 return True | |
| 1337 | |
| 1338 # Suffix used for backup files. | |
| 1339 BACKUP_FILE_TAG = '.BACKUP' | |
| 1340 | |
| 1341 def main(): | |
| 1342 parser = argparse.ArgumentParser() | |
| 1343 parser.add_argument( | |
| 1344 'file', | |
| 1345 help='Trace file to symbolize (.json or .json.gz)') | |
| 1346 | |
| 1347 parser.add_argument( | |
| 1348 '--no-backup', dest='backup', default='true', action='store_false', | |
| 1349 help="Don't create {} files".format(BACKUP_FILE_TAG)) | |
| 1350 | |
| 1351 parser.add_argument( | |
| 1352 '--output-directory', | |
| 1353 help='The path to the build output directory, such as out/Debug.') | |
| 1354 | |
| 1355 home_dir = os.path.expanduser('~') | |
| 1356 default_dir = os.path.join(home_dir, "symbols") | |
| 1357 parser.add_argument( | |
| 1358 '--symbol-base-directory', | |
| 1359 default=default_dir, | |
| 1360 help='Directory where symbols are downloaded and cached.') | |
| 1361 | |
| 1362 symbolizer = Symbolizer() | |
| 1363 if symbolizer.symbolizer_path is None: | |
| 1364 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary) | |
| 1365 | |
| 1366 options = parser.parse_args() | |
| 1367 | |
| 1368 trace_file_path = options.file | |
| 1369 | |
| 1370 print 'Reading trace file...' | |
| 1371 with OpenTraceFile(trace_file_path, 'r') as trace_file: | |
| 1372 trace = Trace(json.load(trace_file)) | |
| 1373 | |
| 1374 # Perform some sanity checks. | |
| 1375 if trace.is_win and sys.platform != 'win32': | |
| 1376 print "Cannot symbolize a windows trace on this architecture!" | |
| 1377 return False | |
| 1378 | |
| 1379 # If the trace is from Chromium, assume that symbols are already present. | |
| 1380 # Otherwise the trace is from Google Chrome. Assume that this is not a local | |
| 1381 # build of Google Chrome with symbols, and that we need to fetch symbols | |
| 1382 # from gcs. | |
| 1383 if not trace.is_chromium: | |
| 1384 has_symbols = False | |
| 1385 if symbolizer.is_mac: | |
| 1386 has_symbols = FetchAndExtractSymbolsMac(options.symbol_base_directory, | |
| 1387 trace.version) | |
| 1388 if symbolizer.is_win: | |
| 1389 has_symbols = FetchAndExtractSymbolsWin(options.symbol_base_directory, | |
| 1390 trace.version, trace.is_64bit) | |
| 1391 if not has_symbols: | |
| 1392 print 'Cannot fetch symbols from GCS' | |
| 1393 return False | |
| 1394 | |
| 1395 Symbolize(options, trace, symbolizer) | |
| 1396 | |
| 1397 if trace.modified: | |
| 1398 trace.ApplyModifications() | |
| 1399 | |
| 1400 if options.backup: | |
| 1401 backup_file_path = trace_file_path + BACKUP_FILE_TAG | |
| 1402 print 'Backing up trace file to {}'.format(backup_file_path) | |
| 1403 os.rename(trace_file_path, backup_file_path) | |
| 1404 | |
| 1405 print 'Updating the trace file...' | |
| 1406 with OpenTraceFile(trace_file_path, 'w') as trace_file: | |
| 1407 json.dump(trace.node, trace_file) | |
| 1408 else: | |
| 1409 print 'No modifications were made - not updating the trace file.' | |
| 1410 | |
| 1411 | |
| 1412 if __name__ == '__main__': | |
| 1413 main() | |
| OLD | NEW |